summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/H5AC.c2106
-rw-r--r--src/H5ACprivate.h4
-rw-r--r--src/H5C.c1053
-rw-r--r--src/H5Cpkg.h39
-rw-r--r--src/H5Cprivate.h71
-rw-r--r--src/H5D.c2
-rw-r--r--src/H5F.c62
-rw-r--r--src/H5FD.c2
-rw-r--r--src/H5FDmpio.c38
-rw-r--r--src/H5FDmpiposix.c34
-rw-r--r--src/H5FDmulti.c9
-rw-r--r--src/H5Fprivate.h1
-rw-r--r--src/H5HL.c189
-rw-r--r--src/H5HLpkg.h1
-rw-r--r--src/H5MF.c8
-rw-r--r--src/H5MFprivate.h2
-rw-r--r--src/H5O.c746
-rw-r--r--src/H5Oprivate.h6
18 files changed, 3972 insertions, 401 deletions
diff --git a/src/H5AC.c b/src/H5AC.c
index d4b70d2..eff9137 100644
--- a/src/H5AC.c
+++ b/src/H5AC.c
@@ -42,18 +42,24 @@
*-------------------------------------------------------------------------
*/
+#define H5C_PACKAGE /*suppress error about including H5Cpkg */
#define H5F_PACKAGE /*suppress error about including H5Fpkg */
/* Interface initialization */
#define H5_INTERFACE_INIT_FUNC H5AC_init_interface
+#ifdef H5_HAVE_PARALLEL
+#include <mpi.h>
+#endif /* H5_HAVE_PARALLEL */
#include "H5private.h" /* Generic Functions */
#include "H5ACprivate.h" /* Metadata cache */
+#include "H5Cpkg.h" /* Cache */
#include "H5Dprivate.h" /* Dataset functions */
#include "H5Eprivate.h" /* Error handling */
#include "H5Fpkg.h" /* Files */
#include "H5FDprivate.h" /* File drivers */
+#include "H5FLprivate.h" /* Free Lists */
#include "H5Iprivate.h" /* IDs */
#include "H5Pprivate.h" /* Property lists */
@@ -62,6 +68,299 @@
#include "H5FPprivate.h" /* Flexible PHDF5 */
#endif /* H5_HAVE_FPHDF5 */
+#define H5AC_DEBUG_DIRTY_BYTES_CREATION 0
+
+/****************************************************************************
+ *
+ * structure H5AC_aux_t
+ *
+ * While H5AC has become a wrapper for the cache implemented in H5C.c, there
+ * are some features of the metadata cache that are specific to it, and which
+ * therefore do not belong in the more generic H5C cache code.
+ *
+ * In particular, there is the matter of synchronizing writes from the
+ * metadata cache to disk in the PHDF5 case.
+ *
+ * Prior to this update, the presumption was that all metadata caches would
+ * write the same data at the same time since all operations modifying
+ * metadata must be performed collectively. Given this assumption, it was
+ * safe to allow only the writes from process 0 to actually make it to disk,
+ * while metadata writes from all other processes were discarded.
+ *
+ * Unfortunately, this presumption is in error as operations that read
+ * metadata need not be collective, but can change the location of dirty
+ * entries in the metadata cache LRU lists. This can result in the same
+ * metadata write operation triggering writes from the metadata caches on
+ * some processes, but not all (causing a hang), or in different sets of
+ * entries being written from different caches (potentially resulting in
+ * metadata corruption in the file).
+ *
+ * To deal with this issue, I decided to apply a paradigm shift to the way
+ * metadata is written to disk.
+ *
+ * With this set of changes, only the metadata cache on process 0 is able
+ * to write metadata to disk, although metadata caches on all other
+ * processes can read metadata from disk as before.
+ *
+ * To keep all the other caches from getting plugged up with dirty metadata,
+ * process 0 periodically broadcasts a list of entries that it has flushed
+ * since that last notice, and which are currently clean. The other caches
+ * mark these entries as clean as well, which allows them to evict the
+ * entries as needed.
+ *
+ * One obvious problem in this approach is synchronizing the broadcasts
+ * and receptions, as different caches may see different amounts of
+ * activity.
+ *
+ * The current solution is for the caches to track the number of bytes
+ * of newly generated dirty metadata, and to broadcast and receive
+ * whenever this value exceeds some user specified threshold.
+ *
+ * Maintaining this count is easy for all processes not on process 0 --
+ * all that is necessary is to add the size of the entry to the total
+ * whenever there is an insertion, a rename of a previously clean entry,
+ * or whever a previously clean entry is marked dirty in an unprotect.
+ *
+ * On process 0, we have to be careful not to count dirty bytes twice.
+ * If an entry is marked dirty, flushed, and marked dirty again, all
+ * within a single reporting period, it only th first marking should
+ * be added to the dirty bytes generated tally, as that is all that
+ * the other processes will see.
+ *
+ * At present, this structure exists to maintain the fields needed to
+ * implement the above scheme, and thus is only used in the parallel
+ * case. However, other uses may arise in the future.
+ *
+ * Instance of this structure are associated with metadata caches via
+ * the aux_ptr field of H5C_t (see H5Cpkg.h). The H5AC code is
+ * responsible for allocating, maintaining, and discarding instances
+ * of H5AC_aux_t.
+ *
+ * The remainder of this header comments documents the individual fields
+ * of the structure.
+ *
+ * JRM - 6/27/05
+ *
+ * magic: Unsigned 32 bit integer always set to
+ * H5AC__H5AC_AUX_T_MAGIC. This field is used to validate
+ * pointers to instances of H5AC_aux_t.
+ *
+ * mpi_comm: MPI communicator associated with the file for which the
+ * cache has been created.
+ *
+ * mpi_rank: MPI rank of this process within mpi_comm.
+ *
+ * mpi_size: Number of processes in mpi_comm.
+ *
+ * write_permitted: Boolean flag used to control whether the cache
+ * is permitted to write to file.
+ *
+ * dirty_bytes_threshold: Integer field containing the dirty bytes
+ * generation threashold. Whenever dirty byte creation
+ * exceeds this value, the metadata cache on process 0
+ * broadcasts a list of the entries it has flushed since
+ * the last broadcast (or since the beginning of execution)
+ * and which are currently clean (if they are still in the
+ * cache)
+ *
+ * Similarly, metadata caches on processes other than process
+ * 0 will attempt to receive a list of clean entries whenever
+ * the threshold is exceeded.
+ *
+ * dirty_bytes: Integer field containing the number of bytes of dirty
+ * metadata generated since the beginning of the computation,
+ * or (more typically) since the last clean entries list
+ * broadcast. This field is reset to zero after each such
+ * broadcast.
+ *
+ * dirty_bytes_propagations: This field only exists when the
+ * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
+ *
+ * It is used to track the number of times the cleaned list
+ * has been propagated from process 0 to the other
+ * processes.
+ *
+ * unprotect_dirty_bytes: This field only exists when the
+ * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
+ *
+ * It is used to track the number of dirty bytes created
+ * via unprotect operations since the last time the cleaned
+ * list was propagated.
+ *
+ * unprotect_dirty_bytes_updates: This field only exists when the
+ * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
+ *
+ * It is used to track the number of times dirty bytes have
+ * been created via unprotect operations since the last time
+ * the cleaned list was propagated.
+ *
+ * insert_dirty_bytes: This field only exists when the
+ * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
+ *
+ * It is used to track the number of dirty bytes created
+ * via insert operations since the last time the cleaned
+ * list was propagated.
+ *
+ * insert_dirty_bytes_updates: This field only exists when the
+ * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
+ *
+ * It is used to track the number of times dirty bytes have
+ * been created via insert operations since the last time
+ * the cleaned list was propagated.
+ *
+ * rename_dirty_bytes: This field only exists when the
+ * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
+ *
+ * It is used to track the number of dirty bytes created
+ * via rename operations since the last time the cleaned
+ * list was propagated.
+ *
+ * rename_dirty_bytes_updates: This field only exists when the
+ * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
+ *
+ * It is used to track the number of times dirty bytes have
+ * been created via rename operations since the last time
+ * the cleaned list was propagated.
+ *
+ * d_slist_ptr: Pointer to an instance of H5SL_t used to maintain a list
+ * of entries that have been dirtied since the last time they
+ * were listed in a clean entries broadcast. This list is
+ * only maintained by the metadata cache on process 0 -- it
+ * it used to maintain a view of the dirty entries as seen
+ * by the other caches, so as to keep the dirty bytes count
+ * in synchronization with them.
+ *
+ * Thus on process 0, the dirty_bytes count is incremented
+ * only if either
+ *
+ * 1) an entry is inserted in the metadata cache, or
+ *
+ * 2) a previously clean entry is renamed, and it does not
+ * already appear in the dirty entry list, or
+ *
+ * 3) a previously clean entry is unprotected with the
+ * dirtied flag set and the entry does not already appear
+ * in the dirty entry list.
+ *
+ * Entries are added to the dirty entry list whever they cause
+ * the dirty bytes count to be increased. They are removed
+ * when they appear in a clean entries broadcast. Note that
+ * renames must be reflected in the dirty entry list.
+ *
+ * To reitterate, this field is only used on process 0 -- it
+ * should be NULL on all other processes.
+ *
+ * d_slist_len: Integer field containing the number of entries in the
+ * dirty entry list. This field should always contain the
+ * value 0 on all processes other than process 0. It exists
+ * primarily for sanity checking.
+ *
+ * c_slist_ptr: Pointer to an instance of H5SL_t used to maintain a list
+ * of entries that were dirty, have been flushed
+ * to disk since the last clean entries broadcast, and are
+ * still clean. Since only process 0 can write to disk, this
+ * list only exists on process 0.
+ *
+ * In essence, this slist is used to assemble the contents of
+ * the next clean entries broadcast. The list emptied after
+ * each broadcast.
+ *
+ * c_slist_len: Integer field containing the number of entries in the clean
+ * entries list (*c_slist_ptr). This field should always
+ * contain the value 0 on all processes other than process 0.
+ * It exists primarily for sanity checking.
+ *
+ ****************************************************************************/
+
+#ifdef H5_HAVE_PARALLEL
+
+#define H5AC__H5AC_AUX_T_MAGIC (unsigned)0x00D0A01
+
+typedef struct H5AC_aux_t
+{
+ uint32_t magic;
+
+ MPI_Comm mpi_comm;
+
+ int mpi_rank;
+
+ int mpi_size;
+
+ hbool_t write_permitted;
+
+ int32_t dirty_bytes_threshold;
+
+ int32_t dirty_bytes;
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+
+ int32_t dirty_bytes_propagations;
+
+ int32_t unprotect_dirty_bytes;
+ int32_t unprotect_dirty_bytes_updates;
+
+ int32_t insert_dirty_bytes;
+ int32_t insert_dirty_bytes_updates;
+
+ int32_t rename_dirty_bytes;
+ int32_t rename_dirty_bytes_updates;
+
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+
+ H5SL_t * d_slist_ptr;
+
+ int32_t d_slist_len;
+
+ H5SL_t * c_slist_ptr;
+
+ int32_t c_slist_len;
+
+} H5AC_aux_t; /* struct H5AC_aux_t */
+
+/* Declare a free list to manage the H5AC_aux_t struct */
+H5FL_DEFINE_STATIC(H5AC_aux_t);
+
+#endif /* H5_HAVE_PARALLEL */
+
+/****************************************************************************
+ *
+ * structure H5AC_slist_entry_t
+ *
+ * The dirty entry list maintained via the d_slist_ptr field of H5AC_aux_t
+ * and the cleaned entry list maintained via the c_slist_ptr field of
+ * H5AC_aux_t are just lists of the file offsets of the dirty/cleaned
+ * entries. Unfortunately, the slist code makes us define a dynamically
+ * allocated structure to store these offsets in. This structure serves
+ * that purpose. Its fields are as follows:
+ *
+ * magic: Unsigned 32 bit integer always set to
+ * H5AC__H5AC_SLIST_ENTRY_T_MAGIC. This field is used to
+ * validate pointers to instances of H5AC_slist_entry_t.
+ *
+ * addr: file offset of a metadata entry. Entries are added to this
+ * list (if they aren't there already) when they are marked
+ * dirty in an unprotect, inserted, or renamed. They are
+ * removed when they appear in a clean entries broadcast.
+ *
+ ****************************************************************************/
+
+#ifdef H5_HAVE_PARALLEL
+
+#define H5AC__H5AC_SLIST_ENTRY_T_MAGIC 0x00D0A02
+
+typedef struct H5AC_slist_entry_t
+{
+ uint32_t magic;
+
+ haddr_t addr;
+} H5AC_slist_entry_t;
+
+/* Declare a free list to manage the H5AC_slist_entry_t struct */
+H5FL_DEFINE_STATIC(H5AC_slist_entry_t);
+
+#endif /* H5_HAVE_PARALLEL */
+
+
/*
* Private file-scope variables.
*/
@@ -90,6 +389,55 @@ static herr_t H5AC_check_if_write_permitted(const H5F_t *f,
hid_t dxpl_id,
hbool_t * write_permitted_ptr);
+#ifdef H5_HAVE_PARALLEL
+static herr_t H5AC_broadcast_clean_list(H5AC_t * cache_ptr);
+
+static herr_t H5AC_log_deleted_entry(H5AC_t * cache_ptr,
+ H5AC_info_t * entry_ptr,
+ haddr_t addr,
+ unsigned int flags);
+
+static herr_t H5AC_log_dirtied_entry(H5AC_t * cache_ptr,
+ H5C_cache_entry_t * entry_ptr,
+ haddr_t addr,
+ hbool_t size_changed,
+ size_t new_size);
+
+static herr_t H5AC_log_flushed_entry(H5C_t * cache_ptr,
+ haddr_t addr,
+ hbool_t was_dirty,
+ unsigned flags,
+ int type_id);
+
+#if 0 /* this is useful debugging code -- JRM */
+static herr_t H5AC_log_flushed_entry_dummy(H5C_t * cache_ptr,
+ haddr_t addr,
+ hbool_t was_dirty,
+ unsigned flags,
+ int type_id);
+#endif /* JRM */
+
+static herr_t H5AC_log_inserted_entry(H5F_t * f,
+ H5AC_t * cache_ptr,
+ H5AC_info_t * entry_ptr,
+ const H5AC_class_t * type,
+ haddr_t addr);
+
+static herr_t H5AC_propagate_flushed_and_still_clean_entries_list(H5F_t * f,
+ hid_t dxpl_id,
+ H5AC_t * cache_ptr,
+ hbool_t do_barrier);
+
+static herr_t H5AC_receive_and_apply_clean_list(H5F_t * f,
+ hid_t primary_dxpl_id,
+ hid_t secondary_dxpl_id,
+ H5AC_t * cache_ptr);
+
+static herr_t H5AC_log_renamed_entry(H5AC_t * cache_ptr,
+ haddr_t old_addr,
+ haddr_t new_addr);
+#endif /* H5_HAVE_PARALLEL */
+
/*-------------------------------------------------------------------------
* Function: H5AC_init
@@ -351,6 +699,13 @@ H5AC_term_interface(void)
* through the function.
* JRM - 4/7/05
*
+ * Added code allocating and initializing the auxilary
+ * structure (an instance of H5AC_aux_t), and linking it
+ * to the instance of H5C_t created by H5C_create(). At
+ * present, the auxilary structure is only used in PHDF5.
+ *
+ * JRM - 6/28/05
+ *
*-------------------------------------------------------------------------
*/
@@ -376,6 +731,12 @@ H5AC_create(const H5F_t *f,
{
herr_t ret_value = SUCCEED; /* Return value */
herr_t result;
+#ifdef H5_HAVE_PARALLEL
+ MPI_Comm mpi_comm = MPI_COMM_NULL;
+ int mpi_rank = -1;
+ int mpi_size = -1;
+ H5AC_aux_t * aux_ptr = NULL;
+#endif /* H5_HAVE_PARALLEL */
FUNC_ENTER_NOAPI(H5AC_create, FAIL)
@@ -390,15 +751,138 @@ H5AC_create(const H5F_t *f,
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Bad cache configuration");
}
- /* The default max cache size and min clean size will frequently be
- * overwritten shortly by the subsequent set resize config call.
- * -- JRM
- */
- f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE,
- H5AC__DEFAULT_MIN_CLEAN_SIZE,
- (H5AC_NTYPES - 1),
- (const char **)H5AC_entry_type_names,
- H5AC_check_if_write_permitted);
+#ifdef H5_HAVE_PARALLEL
+ if ( IS_H5FD_MPI(f) ) {
+
+ if ( (mpi_comm = H5F_mpi_get_comm(f)) == MPI_COMM_NULL ) {
+
+ HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, \
+ "can't get MPI communicator")
+ }
+
+ if ( (mpi_rank = H5F_mpi_get_rank(f)) < 0 ) {
+
+ HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get mpi rank")
+ }
+
+ if ( (mpi_size = H5F_mpi_get_size(f)) < 0 ) {
+
+ HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get mpi size")
+ }
+
+ /* There is no point in setting up the auxilary structure if size
+ * is less than or equal to 1, as there will never be any processes
+ * to broadcast the clean lists to.
+ */
+ if ( mpi_size > 1 ) {
+
+ if ( NULL == (aux_ptr = H5FL_CALLOC(H5AC_aux_t)) ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "Can't allocate H5AC auxilary structure.")
+
+ } else {
+
+ aux_ptr->magic = H5AC__H5AC_AUX_T_MAGIC;
+ aux_ptr->mpi_comm = mpi_comm;
+ aux_ptr->mpi_rank = mpi_rank;
+ aux_ptr->mpi_size = mpi_size;
+ aux_ptr->write_permitted = FALSE;
+ aux_ptr->dirty_bytes_threshold = 256 * 1024;
+ aux_ptr->dirty_bytes = 0;
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->dirty_bytes_propagations = 0;
+ aux_ptr->unprotect_dirty_bytes = 0;
+ aux_ptr->unprotect_dirty_bytes_updates = 0;
+ aux_ptr->insert_dirty_bytes = 0;
+ aux_ptr->insert_dirty_bytes_updates = 0;
+ aux_ptr->rename_dirty_bytes = 0;
+ aux_ptr->rename_dirty_bytes_updates = 0;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+ aux_ptr->d_slist_ptr = NULL;
+ aux_ptr->d_slist_len = 0;
+ aux_ptr->c_slist_ptr = NULL;
+ aux_ptr->c_slist_len = 0;
+ }
+
+ if ( mpi_rank == 0 ) {
+
+ aux_ptr->d_slist_ptr =
+ H5SL_create(H5SL_TYPE_HADDR,0.5,(size_t)16);
+
+ if ( aux_ptr->d_slist_ptr == NULL ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL,
+ "can't create dirtied entry list.")
+ }
+
+ aux_ptr->c_slist_ptr =
+ H5SL_create(H5SL_TYPE_HADDR,0.5,(size_t)16);
+
+ if ( aux_ptr->c_slist_ptr == NULL ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL,
+ "can't create cleaned entry list.")
+ }
+ }
+ }
+
+ if ( aux_ptr != NULL ) {
+
+ if ( aux_ptr->mpi_rank == 0 ) {
+
+ f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE,
+ H5AC__DEFAULT_MIN_CLEAN_SIZE,
+ (H5AC_NTYPES - 1),
+ (const char **)H5AC_entry_type_names,
+ H5AC_check_if_write_permitted,
+ TRUE,
+ H5AC_log_flushed_entry,
+ (void *)aux_ptr);
+
+ } else {
+
+ f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE,
+ H5AC__DEFAULT_MIN_CLEAN_SIZE,
+ (H5AC_NTYPES - 1),
+ (const char **)H5AC_entry_type_names,
+ NULL,
+ FALSE,
+#if 0 /* this is useful debugging code -- keep it for a while */ /* JRM */
+ H5AC_log_flushed_entry_dummy,
+#else /* JRM */
+ NULL,
+#endif /* JRM */
+ (void *)aux_ptr);
+ }
+ } else {
+
+ f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE,
+ H5AC__DEFAULT_MIN_CLEAN_SIZE,
+ (H5AC_NTYPES - 1),
+ (const char **)H5AC_entry_type_names,
+ H5AC_check_if_write_permitted,
+ TRUE,
+ NULL,
+ NULL);
+ }
+ } else {
+#endif /* H5_HAVE_PARALLEL */
+ /* The default max cache size and min clean size will frequently be
+ * overwritten shortly by the subsequent set resize config call.
+ * -- JRM
+ */
+ f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE,
+ H5AC__DEFAULT_MIN_CLEAN_SIZE,
+ (H5AC_NTYPES - 1),
+ (const char **)H5AC_entry_type_names,
+ H5AC_check_if_write_permitted,
+ TRUE,
+ NULL,
+ NULL);
+#ifdef H5_HAVE_PARALLEL
+ }
+#endif /* H5_HAVE_PARALLEL */
if ( NULL == f->shared->cache ) {
@@ -416,6 +900,31 @@ H5AC_create(const H5F_t *f,
done:
+#ifdef H5_HAVE_PARALLEL
+
+ /* if there is a failure, try to tidy up the auxilary structure */
+
+ if ( ret_value != SUCCEED ) {
+
+ if ( aux_ptr != NULL ) {
+
+ if ( aux_ptr->d_slist_ptr != NULL ) {
+
+ H5SL_close(aux_ptr->d_slist_ptr);
+ }
+
+ if ( aux_ptr->c_slist_ptr != NULL ) {
+
+ H5SL_close(aux_ptr->c_slist_ptr);
+ }
+
+ aux_ptr->magic = 0;
+ H5FL_FREE(H5AC_aux_t, aux_ptr);
+ aux_ptr = NULL;
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
FUNC_LEAVE_NOAPI(ret_value)
} /* H5AC_create() */
@@ -445,6 +954,10 @@ done:
*
* JRM - 6/7/04
*
+ * Added code to free the auxiliary structure and its
+ * associated slist if present.
+ * JRM - 6/28/05
+ *
*-------------------------------------------------------------------------
*/
herr_t
@@ -452,12 +965,23 @@ H5AC_dest(H5F_t *f, hid_t dxpl_id)
{
H5AC_t *cache = NULL;
herr_t ret_value=SUCCEED; /* Return value */
+#ifdef H5_HAVE_PARALLEL
+ H5AC_aux_t * aux_ptr = NULL;
+#endif /* H5_HAVE_PARALLEL */
FUNC_ENTER_NOAPI(H5AC_dest, FAIL)
assert(f);
assert(f->shared->cache);
cache = f->shared->cache;
+#ifdef H5_HAVE_PARALLEL
+ aux_ptr = cache->aux_ptr;
+
+ if ( aux_ptr != NULL ) {
+
+ HDassert ( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ }
+#endif /* H5_HAVE_PARALLEL */
f->shared->cache = NULL;
@@ -466,6 +990,25 @@ H5AC_dest(H5F_t *f, hid_t dxpl_id)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "can't destroy cache")
}
+#ifdef H5_HAVE_PARALLEL
+ if ( aux_ptr != NULL ) {
+
+ if ( aux_ptr->d_slist_ptr != NULL ) {
+
+ H5SL_close(aux_ptr->d_slist_ptr);
+ }
+
+ if ( aux_ptr->c_slist_ptr != NULL ) {
+
+ H5SL_close(aux_ptr->c_slist_ptr);
+ }
+
+ aux_ptr->magic = 0;
+ H5FL_FREE(H5AC_aux_t, aux_ptr);
+ aux_ptr = NULL;
+ }
+#endif /* H5_HAVE_PARALLEL */
+
done:
FUNC_LEAVE_NOAPI(ret_value)
@@ -529,28 +1072,103 @@ done:
*
* Complete re-write. See above for details. -- JRM 5/11/04
*
- * Abstracted the guts of the function to H5C_dest() in H5C.c,
- * and then re-wrote the function as a wrapper for H5C_dest().
+ * Abstracted the guts of the function to H5C_flush_cache()
+ * in H5C.c, and then re-wrote the function as a wrapper for
+ * H5C_flush_cache().
*
* JRM - 6/7/04
*
+ * JRM - 7/5/05
+ * Modified function as part of a fix for a cache coherency
+ * bug in PHDF5. See the header comments on the H5AC_aux_t
+ * structure for details.
+ *
*-------------------------------------------------------------------------
*/
herr_t
H5AC_flush(H5F_t *f, hid_t dxpl_id, unsigned flags)
{
- herr_t status;
- herr_t ret_value=SUCCEED; /* Return value */
+ herr_t status;
+ herr_t ret_value = SUCCEED; /* Return value */
+#ifdef H5_HAVE_PARALLEL
+ H5AC_aux_t * aux_ptr = NULL;
+ int mpi_code;
+#endif /* H5_HAVE_PARALLEL */
+
FUNC_ENTER_NOAPI(H5AC_flush, FAIL)
HDassert(f);
HDassert(f->shared->cache);
- status = H5C_flush_cache(f,
- dxpl_id,
- H5AC_noblock_dxpl_id,
- f->shared->cache,
+#ifdef H5_HAVE_PARALLEL
+ aux_ptr = f->shared->cache->aux_ptr;
+
+ if ( aux_ptr != NULL ) {
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ HDfprintf(stdout,
+ "%d::H5AC_flush: (u/uu/i/iu/r/ru) = %d/%d/%d/%d/%d/%d\n",
+ (int)(aux_ptr->mpi_rank),
+ (int)(aux_ptr->unprotect_dirty_bytes),
+ (int)(aux_ptr->unprotect_dirty_bytes_updates),
+ (int)(aux_ptr->insert_dirty_bytes),
+ (int)(aux_ptr->insert_dirty_bytes_updates),
+ (int)(aux_ptr->rename_dirty_bytes),
+ (int)(aux_ptr->rename_dirty_bytes_updates));
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+
+ /* to prevent "messages from the future" we must synchronize all
+ * processes before we start the flush. Hence the following
+ * barrier.
+ */
+ if ( MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)) ) {
+
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code)
+ }
+
+ /* if the clear only flag is set, this flush will not involve any
+ * disk I/O. In such cases, it is not necessary to let process 0
+ * flush first.
+ */
+ if ( ( aux_ptr->mpi_rank == 0 ) &&
+ ( (flags & H5AC__FLUSH_CLEAR_ONLY_FLAG) != 0 ) ) {
+
+ unsigned init_flush_flags = H5AC__NO_FLAGS_SET;
+
+ if ( ( (flags & H5AC__FLUSH_MARKED_ENTRIES_FLAG) != 0 ) &&
+ ( (flags & H5AC__FLUSH_INVALIDATE_FLAG) == 0 ) ) {
+
+ init_flush_flags |= H5AC__FLUSH_MARKED_ENTRIES_FLAG;
+ }
+
+ aux_ptr->write_permitted = TRUE;
+
+ status = H5C_flush_cache(f,
+ H5AC_noblock_dxpl_id,
+ H5AC_noblock_dxpl_id,
+ f->shared->cache,
+ init_flush_flags);
+
+ aux_ptr->write_permitted = FALSE;
+
+ if ( status < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.")
+ }
+ } /* end if ( aux_ptr->mpi_rank == 0 ) */
+
+ status = H5AC_propagate_flushed_and_still_clean_entries_list(f,
+ H5AC_noblock_dxpl_id,
+ f->shared->cache,
+ FALSE);
+ } /* end if ( aux_ptr != NULL ) */
+#endif /* H5_HAVE_PARALLEL */
+
+ status = H5C_flush_cache(f,
+ dxpl_id,
+ H5AC_noblock_dxpl_id,
+ f->shared->cache,
flags);
if ( status < 0 ) {
@@ -616,8 +1234,16 @@ done:
* moving management of the dirty flag on cache entries into
* the cache code.
*
+ * JRM - 7/5/05
+ * Added code to track dirty byte generation, and to trigger
+ * clean entry list propagation when it exceeds a user
+ * specified threshold. Note that this code only applies in
+ * the PHDF5 case. It should have no effect on either the
+ * serial or FPHSD5 cases.
+ *
*-------------------------------------------------------------------------
*/
+
herr_t
H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void *thing, unsigned int flags)
{
@@ -625,6 +1251,9 @@ H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void *
H5AC_info_t *info;
H5AC_t *cache;
herr_t ret_value=SUCCEED; /* Return value */
+#ifdef H5_HAVE_PARALLEL
+ H5AC_aux_t * aux_ptr = NULL;
+#endif /* H5_HAVE_PARALLEL */
FUNC_ENTER_NOAPI(H5AC_set, FAIL)
@@ -717,6 +1346,23 @@ H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void *
#endif /* H5_HAVE_FPHDF5 */
#endif /* H5_HAVE_PARALLEL */
+#ifdef H5_HAVE_PARALLEL
+ if ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) {
+
+ result = H5AC_log_inserted_entry(f,
+ f->shared->cache,
+ (H5AC_info_t *)thing,
+ type,
+ addr);
+
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, \
+ "H5AC_log_inserted_entry() failed.")
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
result = H5C_insert_entry(f,
dxpl_id,
H5AC_noblock_dxpl_id,
@@ -731,6 +1377,22 @@ H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void *
HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, "H5C_insert_entry() failed")
}
+#ifdef H5_HAVE_PARALLEL
+ if ( ( aux_ptr != NULL ) &&
+ ( aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold ) ) {
+
+ result = H5AC_propagate_flushed_and_still_clean_entries_list(f,
+ H5AC_noblock_dxpl_id,
+ f->shared->cache,
+ TRUE);
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
+ "Can't propagate clean entries list.")
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
done:
FUNC_LEAVE_NOAPI(ret_value)
@@ -766,6 +1428,18 @@ done:
* in H5C.c, and then re-wrote the function as a wrapper for
* H5C_rename_entry().
*
+ * JRM - 7/5/05
+ * Added code to track dirty byte generation, and to trigger
+ * clean entry list propagation when it exceeds a user
+ * specified threshold. Note that this code only applies in
+ * the PHDF5 case. It should have no effect on either the
+ * serial or FPHSD5 cases.
+ *
+ * Note that this code presumes that the renamed entry will
+ * be present in all caches -- which it must be at present.
+ * To maintain this invarient, only rename entries immediately
+ * after you unprotect them.
+ *
*-------------------------------------------------------------------------
*/
herr_t
@@ -773,6 +1447,9 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad
{
herr_t result;
herr_t ret_value=SUCCEED; /* Return value */
+#ifdef H5_HAVE_PARALLEL
+ H5AC_aux_t * aux_ptr = NULL;
+#endif /* H5_HAVE_PARALLEL */
FUNC_ENTER_NOAPI(H5AC_rename, FAIL)
@@ -808,6 +1485,13 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad
* In any case, don't check this code in without revisiting this
* issue.
* JRM -- 6/6/05
+ *
+ * On reflection, the code was already broken, as there was no
+ * way to advise the SAP that a renamed entry had changed its
+ * address, or was dirty. I will not worry about it for now,
+ * but the matter must be addressed if we ever get serious
+ * about FPHDF5.
+ * JRM -- 7/5/05
*/
HGOTO_DONE(SUCCEED);
@@ -816,6 +1500,21 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad
#endif /* H5_HAVE_FPHDF5 */
#endif /* H5_HAVE_PARALLEL */
+#ifdef H5_HAVE_PARALLEL
+ if ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) {
+
+ result = H5AC_log_renamed_entry(f->shared->cache,
+ old_addr,
+ new_addr);
+
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
+ "H5AC_log_renamed_entry() failed.")
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
result = H5C_rename_entry(f->shared->cache,
type,
old_addr,
@@ -827,6 +1526,22 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad
"H5C_rename_entry() failed.")
}
+#ifdef H5_HAVE_PARALLEL
+ if ( ( aux_ptr != NULL ) &&
+ ( aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold ) ) {
+
+ result = H5AC_propagate_flushed_and_still_clean_entries_list(f,
+ H5AC_noblock_dxpl_id,
+ f->shared->cache,
+ TRUE);
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
+ "Can't propagate clean entries list.")
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
done:
FUNC_LEAVE_NOAPI(ret_value)
@@ -837,7 +1552,7 @@ done:
/*-------------------------------------------------------------------------
* Function: H5AC_protect
*
- * Purpose: If the target entry is not in the cache, load it. If
+ * Purpose: If the target entry is not in the cache, load it. If
* necessary, attempt to evict one or more entries to keep
* the cache within its maximum size.
*
@@ -845,8 +1560,8 @@ done:
* to the caller. The caller must call H5AC_unprotect() when
* finished with the entry.
*
- * While it is protected, the entry may not be either evicted
- * or flushed -- nor may it be accessed by another call to
+ * While it is protected, the entry may not be either evicted
+ * or flushed -- nor may it be accessed by another call to
* H5AC_protect. Any attempt to do so will result in a failure.
*
* This comment is a re-write of the original Purpose: section.
@@ -887,8 +1602,8 @@ done:
* Purpose section above.
*
* JRM - 6/7/04
- * Abstracted the guts of the function to H5C_protect()
- * in H5C.c, and then re-wrote the function as a wrapper for
+ * Abstracted the guts of the function to H5C_protect()
+ * in H5C.c, and then re-wrote the function as a wrapper for
* H5C_protect().
*
*-------------------------------------------------------------------------
@@ -1108,6 +1823,18 @@ done:
* part of a collection of changes directed at moving
* management of cache entry dirty flags into the H5C code.
*
+ * JRM - 7/5/05
+ * Added code to track dirty byte generation, and to trigger
+ * clean entry list propagation when it exceeds a user
+ * specified threshold. Note that this code only applies in
+ * the PHDF5 case. It should have no effect on either the
+ * serial or FPHSD5 cases.
+ *
+ * JRM - 9/8/05
+ * Added code to track entry size changes. This is necessary
+ * as it can effect dirty byte creation counts, thereby
+ * throwing the caches out of sync in the PHDF5 case.
+ *
*-------------------------------------------------------------------------
*/
herr_t
@@ -1116,6 +1843,12 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
{
herr_t result;
herr_t ret_value=SUCCEED; /* Return value */
+ hbool_t size_changed = FALSE;
+ hbool_t dirtied;
+ size_t new_size = 0;
+#ifdef H5_HAVE_PARALLEL
+ H5AC_aux_t * aux_ptr = NULL;
+#endif /* H5_HAVE_PARALLEL */
FUNC_ENTER_NOAPI(H5AC_unprotect, FAIL)
@@ -1129,6 +1862,23 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
HDassert( ((H5AC_info_t *)thing)->addr == addr );
HDassert( ((H5AC_info_t *)thing)->type == type );
+ dirtied = ((flags & H5AC__DIRTIED_FLAG) == H5AC__DIRTIED_FLAG );
+
+ if ( dirtied ) {
+
+ if ( (type->size)(f, thing, &new_size) < 0 ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_CANTGETSIZE, FAIL, \
+ "Can't get size of thing")
+ }
+
+ if ( ((H5AC_info_t *)thing)->size != new_size ) {
+
+ size_changed = TRUE;
+ flags = flags | H5AC__SIZE_CHANGED_FLAG;
+ }
+ }
+
#ifdef H5_HAVE_PARALLEL
#ifdef H5_HAVE_FPHDF5
/* The following code to support flexible parallel is a direct copy
@@ -1202,6 +1952,40 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
#endif /* H5_HAVE_FPHDF5 */
#endif /* H5_HAVE_PARALLEL */
+#ifdef H5_HAVE_PARALLEL
+ if ( ( dirtied ) && ( ((H5AC_info_t *)thing)->is_dirty == FALSE ) &&
+ ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) ) {
+
+ result = H5AC_log_dirtied_entry(f->shared->cache,
+ (H5AC_info_t *)thing,
+ addr,
+ size_changed,
+ new_size);
+
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
+ "H5AC_log_dirtied_entry() failed.")
+ }
+ }
+
+ if ( ( (flags & H5C__DELETED_FLAG) != 0 ) &&
+ ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) &&
+ ( aux_ptr->mpi_rank == 0 ) ) {
+
+ result = H5AC_log_deleted_entry(f->shared->cache,
+ (H5AC_info_t *)thing,
+ addr,
+ flags);
+
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
+ "H5AC_log_deleted_entry() failed.")
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
result = H5C_unprotect(f,
dxpl_id,
H5AC_noblock_dxpl_id,
@@ -1209,7 +1993,8 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
type,
addr,
thing,
- flags);
+ flags,
+ new_size);
if ( result < 0 ) {
@@ -1217,6 +2002,23 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
"H5C_unprotect() failed.")
}
+#ifdef H5_HAVE_PARALLEL
+ if ( ( aux_ptr != NULL ) &&
+ ( aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold ) ) {
+
+ result = H5AC_propagate_flushed_and_still_clean_entries_list(f,
+ H5AC_noblock_dxpl_id,
+ f->shared->cache,
+ TRUE);
+
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
+ "Can't propagate clean entries list.")
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
done:
FUNC_LEAVE_NOAPI(ret_value)
@@ -1686,6 +2488,184 @@ done:
/*-------------------------------------------------------------------------
*
+ * Function: H5AC_broadcast_clean_list()
+ *
+ * Purpose: Broadcast the contents of the process 0 cleaned entry
+ * slist. In passing, also remove all entries from said
+ * list, and also remove any matching entries from the dirtied
+ * slist.
+ *
+ * This function must only be called by the process with
+ * MPI_rank 0.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 7/1/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_broadcast_clean_list(H5AC_t * cache_ptr)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ haddr_t addr;
+ H5AC_aux_t * aux_ptr = NULL;
+ H5SL_node_t * slist_node_ptr = NULL;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+ MPI_Offset * buf_ptr = NULL;
+ size_t buf_size;
+ int i = 0;
+ int mpi_result;
+ int num_entries;
+
+ FUNC_ENTER_NOAPI(H5AC_broadcast_clean_list, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->mpi_rank == 0 );
+ HDassert( aux_ptr->c_slist_ptr != NULL );
+ HDassert( H5SL_count(aux_ptr->c_slist_ptr) ==
+ (size_t)(aux_ptr->c_slist_len) );
+
+
+ /* First broadcast the number of entries in the list so that the
+ * receives can set up a buffer to receive them. If there aren't
+ * any, we are done.
+ */
+ num_entries = aux_ptr->c_slist_len;
+
+ mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm);
+
+ if ( mpi_result != MPI_SUCCESS ) {
+
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 1", mpi_result)
+
+ }
+
+ if ( num_entries > 0 )
+ {
+ /* allocate a buffer to store the list of entry base addresses in */
+
+ buf_size = sizeof(MPI_Offset) * (size_t)num_entries;
+
+ buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size);
+
+ if ( buf_ptr == NULL ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "memory allocation failed for clean entry buffer")
+ }
+
+ /* now load the entry base addresses into the buffer, emptying the
+ * cleaned entry list in passing
+ */
+
+ while ( NULL != (slist_node_ptr = H5SL_first(aux_ptr->c_slist_ptr) ) )
+ {
+ slist_entry_ptr = H5SL_item(slist_node_ptr);
+
+ HDassert(slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC);
+
+ HDassert( i < num_entries );
+
+ addr = slist_entry_ptr->addr;
+
+ if ( H5FD_mpi_haddr_to_MPIOff(addr, &(buf_ptr[i])) < 0 ) {
+
+ HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, \
+ "can't convert from haddr to MPI off")
+ }
+
+ i++;
+
+ /* now remove the entry from the cleaned entry list */
+ if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from cleaned entry slist.")
+ }
+
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->c_slist_len -= 1;
+
+ HDassert( aux_ptr->c_slist_len >= 0 );
+
+ /* and also remove the matching entry from the dirtied list
+ * if it exists.
+ */
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr,
+ (void *)(&addr))) != NULL ) {
+
+ HDassert( slist_entry_ptr->magic ==
+ H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
+ HDassert( slist_entry_ptr->addr == addr );
+
+ if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from dirty entry slist.")
+ }
+
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->d_slist_len -= 1;
+
+ HDassert( aux_ptr->d_slist_len >= 0 );
+ }
+
+ } /* while */
+
+
+ /* Now broadcast the list of cleaned entries -- if there is one.
+ *
+ * The peculiar structure of the following call to MPI_Bcast is
+ * due to MPI's (?) failure to believe in the MPI_Offset type.
+ * Thus the element type is MPI_BYTE, with size equal to the
+ * buf_size computed above.
+ */
+
+ mpi_result = MPI_Bcast((void *)buf_ptr, (int)buf_size, MPI_BYTE, 0,
+ aux_ptr->mpi_comm);
+
+ if ( mpi_result != MPI_SUCCESS ) {
+
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result)
+ }
+ }
+
+done:
+
+ if ( buf_ptr != NULL ) {
+
+ buf_ptr = (MPI_Offset *)H5MM_xfree((void *)buf_ptr);
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_broadcast_clean_list() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ *
* Function: H5AC_check_if_write_permitted
*
* Purpose: Determine if a write is permitted under the current
@@ -1703,6 +2683,16 @@ done:
*
* Modifications:
*
+ * John Mainzer, 9/23/05
+ * Rewrote function to return the value of the
+ * write_permitted field in aux structure if the structure
+ * exists and mpi_rank is 0.
+ *
+ * If the aux structure exists, but mpi_rank isn't 0, the
+ * function now returns FALSE.
+ *
+ * In all other cases, the function returns TRUE.
+ *
*-------------------------------------------------------------------------
*/
@@ -1720,49 +2710,1087 @@ H5AC_check_if_write_permitted(const H5F_t UNUSED * f,
{
hbool_t write_permitted = TRUE;
herr_t ret_value = SUCCEED; /* Return value */
+#ifdef H5_HAVE_PARALLEL
+ H5AC_aux_t * aux_ptr = NULL;
+#endif /* H5_HAVE_PARALLEL */
+
FUNC_ENTER_NOAPI(H5AC_check_if_write_permitted, FAIL)
#ifdef H5_HAVE_PARALLEL
+ HDassert( f != NULL );
+ HDassert( f->shared != NULL );
+ HDassert( f->shared->cache != NULL );
- if ( IS_H5FD_MPI(f) ) {
+ aux_ptr = (H5AC_aux_t *)(f->shared->cache->aux_ptr);
+
+ if ( aux_ptr != NULL ) {
+
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
- H5P_genplist_t *dxpl; /* Dataset transfer property list */
- H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode property value */
+ if ( aux_ptr->mpi_rank == 0 ) {
- /* Get the dataset transfer property list */
- if ( NULL == (dxpl = H5I_object(dxpl_id)) ) {
+ write_permitted = aux_ptr->write_permitted;
- HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, \
- "not a dataset creation property list")
+ } else {
+
+ write_permitted = FALSE;
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
+ *write_permitted_ptr = write_permitted;
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_check_if_write_permitted() */
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC_log_deleted_entry()
+ *
+ * Purpose: Log an entry for which H5C__DELETED_FLAG has been set.
+ *
+ * If mpi_rank is 0, we must make sure that the entry doesn't
+ * appear in the cleaned or dirty entry lists. Otherwise,
+ * we have nothing to do.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 6/29/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_log_deleted_entry(H5AC_t * cache_ptr,
+ H5AC_info_t * entry_ptr,
+ haddr_t addr,
+ unsigned int flags)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ H5AC_aux_t * aux_ptr = NULL;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+
+ FUNC_ENTER_NOAPI(H5AC_log_deleted_entry, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+
+ HDassert( entry_ptr != NULL );
+ HDassert( entry_ptr->addr == addr );
+
+ HDassert( (flags & H5C__DELETED_FLAG) != 0 );
+
+ if ( aux_ptr->mpi_rank == 0 ) {
+
+ HDassert( aux_ptr->d_slist_ptr != NULL );
+ HDassert( aux_ptr->c_slist_ptr != NULL );
+
+ /* if the entry appears in the dirtied entry slist, remove it. */
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr,
+ (void *)(&addr))) != NULL ) {
+
+ HDassert( slist_entry_ptr->magic ==
+ H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
+ HDassert( slist_entry_ptr->addr == addr );
+
+ if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from dirty entry slist.")
+ }
+
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->d_slist_len -= 1;
+
+ HDassert( aux_ptr->d_slist_len >= 0 );
}
- /* Get the transfer mode property */
- if( H5P_get(dxpl, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode) < 0 ) {
+ /* if the entry appears in the cleaned entry slist, remove it. */
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr,
+ (void *)(&addr))) != NULL ) {
- HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, \
- "can't retrieve xfer mode")
+ HDassert( slist_entry_ptr->magic ==
+ H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
+ HDassert( slist_entry_ptr->addr == addr );
+
+ if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from cleaned entry slist.")
+ }
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->c_slist_len -= 1;
+
+ HDassert( aux_ptr->c_slist_len >= 0 );
}
+ }
- if ( xfer_mode == H5FD_MPIO_INDEPENDENT ) {
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
- write_permitted = FALSE;
+} /* H5AC_log_deleted_entry() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC_log_dirtied_entry()
+ *
+ * Purpose: Update the dirty_bytes count for a newly dirtied entry.
+ *
+ * If mpi_rank isnt 0, this simply means adding the size
+ * of the entries to the dirty_bytes count.
+ *
+ * If mpi_rank is 0, we must first check to see if the entry
+ * appears in the dirty entries slist. If it is, do nothing.
+ * If it isn't, add the size to th dirty_bytes count, add the
+ * entry to the dirty entries slist, and remove it from the
+ * cleaned list (if it is present there).
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 6/29/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_log_dirtied_entry(H5AC_t * cache_ptr,
+ H5AC_info_t * entry_ptr,
+ haddr_t addr,
+ hbool_t size_changed,
+ size_t new_size)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ size_t entry_size;
+ H5AC_aux_t * aux_ptr = NULL;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+
+ FUNC_ENTER_NOAPI(H5AC_log_dirtied_entry, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+
+ HDassert( entry_ptr != NULL );
+ HDassert( entry_ptr->addr == addr );
+ HDassert( entry_ptr->is_dirty == FALSE );
+
+ if ( size_changed ) {
+
+ entry_size = new_size;
+
+ } else {
+
+ entry_size = entry_ptr->size;
+ }
+
+ if ( aux_ptr->mpi_rank == 0 ) {
+
+ HDassert( aux_ptr->d_slist_ptr != NULL );
+ HDassert( aux_ptr->c_slist_ptr != NULL );
+
+ if ( H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr)) == NULL ) {
+
+ /* insert the address of the entry in the dirty entry list, and
+ * add its size to the dirty_bytes count.
+ */
+ if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "Can't allocate dirty slist entry .")
+ }
+
+ slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC;
+ slist_entry_ptr->addr = addr;
+
+ if ( H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr,
+ &(slist_entry_ptr->addr)) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \
+ "can't insert entry into dirty entry slist.")
+ }
+
+ aux_ptr->d_slist_len += 1;
+ aux_ptr->dirty_bytes += entry_size;
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->unprotect_dirty_bytes += entry_size;
+ aux_ptr->unprotect_dirty_bytes_updates += 1;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+ }
+
+ if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) != NULL ) {
+
+ /* the entry is dirty. If it exists on the cleaned entries list,
+ * remove it.
+ */
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr,
+ (void *)(&addr))) != NULL ) {
+
+ HDassert( slist_entry_ptr->magic ==
+ H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
+ HDassert( slist_entry_ptr->addr == addr );
+
+ if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from clean entry slist.")
+ }
+
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->c_slist_len -= 1;
+
+ HDassert( aux_ptr->c_slist_len >= 0 );
+ }
+ }
+ } else {
+
+ aux_ptr->dirty_bytes += entry_size;
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->unprotect_dirty_bytes += entry_size;
+ aux_ptr->unprotect_dirty_bytes_updates += 1;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+ }
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_log_dirtied_entry() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC_log_flushed_entry()
+ *
+ * Purpose: Update the clean entry slist for the flush of an entry --
+ * specifically, if the entry has been cleared, remove it
+ * from both the cleaned and dirtied lists if it is present.
+ * Otherwise, if the entry was dirty, insert the indicated
+ * entry address in the clean slist if it isn't there already.
+ *
+ * This function is only used in PHDF5, and should only
+ * be called for the process with mpi rank 0.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 6/29/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+#if 0 /* This is useful debugging code. -- JRM */
+static herr_t
+H5AC_log_flushed_entry_dummy(H5C_t * cache_ptr,
+ haddr_t addr,
+ hbool_t was_dirty,
+ unsigned flags,
+ int type_id)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ H5AC_aux_t * aux_ptr = NULL;
+
+ FUNC_ENTER_NOAPI(H5AC_log_flushed_entry_dummy, FAIL)
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ if ( ( was_dirty ) && ( (flags & H5C__FLUSH_CLEAR_ONLY_FLAG) == 0 ) ) {
+
+ HDfprintf(stdout,
+ "%d:H5AC_log_flushed_entry(): addr = %d, flags = %x, was_dirty = %d, type_id = %d\n",
+ (int)(aux_ptr->mpi_rank), (int)addr, flags, (int)was_dirty, type_id);
+ }
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_log_flushed_entry_dummy() */
+#endif /* JRM */
+
+static herr_t
+H5AC_log_flushed_entry(H5C_t * cache_ptr,
+ haddr_t addr,
+ hbool_t was_dirty,
+ unsigned flags,
+ UNUSED int type_id)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ hbool_t cleared;
+ H5AC_aux_t * aux_ptr;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+
+
+ FUNC_ENTER_NOAPI(H5AC_log_flushed_entry, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->mpi_rank == 0 );
+ HDassert( aux_ptr->c_slist_ptr != NULL );
+
+ cleared = ( (flags & H5C__FLUSH_CLEAR_ONLY_FLAG) != 0 );
+
+ if ( cleared ) {
+
+ /* If the entry has been cleared, must remove it from both the
+ * cleaned list and the dirtied list.
+ */
+
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr,
+ (void *)(&addr))) != NULL ) {
+
+ HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC);
+ HDassert( slist_entry_ptr->addr == addr );
+
+ if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from clean entry slist.")
+ }
+
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->c_slist_len -= 1;
+
+ HDassert( aux_ptr->c_slist_len >= 0 );
+ }
+
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr,
+ (void *)(&addr))) != NULL ) {
+
+ HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC);
+ HDassert( slist_entry_ptr->addr == addr );
+
+ if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from dirty entry slist.")
+ }
+
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->d_slist_len -= 1;
+
+ HDassert( aux_ptr->d_slist_len >= 0 );
+ }
+ } else if ( was_dirty ) {
+
+ if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) == NULL ) {
+
+ /* insert the address of the entry in the clean entry list. */
+
+ if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "Can't allocate clean slist entry .")
+ }
+
+ slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC;
+ slist_entry_ptr->addr = addr;
+
+ if ( H5SL_insert(aux_ptr->c_slist_ptr, slist_entry_ptr,
+ &(slist_entry_ptr->addr)) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \
+ "can't insert entry into clean entry slist.")
+ }
+
+ aux_ptr->c_slist_len += 1;
+ }
+ }
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_log_flushed_entry() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC_log_inserted_entry()
+ *
+ * Purpose: Update the dirty_bytes count for a newly inserted entry.
+ *
+ * If mpi_rank isnt 0, this simply means adding the size
+ * of the entry to the dirty_bytes count.
+ *
+ * If mpi_rank is 0, we must also add the entry to the
+ * dirty entries slist.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 6/30/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_log_inserted_entry(H5F_t * f,
+ H5AC_t * cache_ptr,
+ H5AC_info_t * entry_ptr,
+ const H5AC_class_t * type,
+ haddr_t addr)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ size_t size;
+ H5AC_aux_t * aux_ptr = NULL;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+
+ FUNC_ENTER_NOAPI(H5AC_log_inserted_entry, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+
+ HDassert( entry_ptr != NULL );
+ HDassert( entry_ptr->addr == addr );
+ HDassert( entry_ptr->type == type );
+
+ /* the size field of the entry will not have been set yet, so we
+ * have to obtain it directly.
+ */
+ if ( (type->size)(f, (void *)entry_ptr, &size) < 0 ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_CANTGETSIZE, FAIL, \
+ "Can't get size of entry to be inserted.")
+ }
+
+ if ( aux_ptr->mpi_rank == 0 ) {
+
+ HDassert( aux_ptr->d_slist_ptr != NULL );
+ HDassert( aux_ptr->c_slist_ptr != NULL );
+
+ if ( H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr)) == NULL ) {
+
+ /* insert the address of the entry in the dirty entry list, and
+ * add its size to the dirty_bytes count.
+ */
+ if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "Can't allocate dirty slist entry .")
+ }
+
+ slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC;
+ slist_entry_ptr->addr = addr;
+
+ if ( H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr,
+ &(slist_entry_ptr->addr)) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \
+ "can't insert entry into dirty entry slist.")
+ }
+
+ aux_ptr->d_slist_len += 1;
+
+ } else {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Inserted entry already in dirty slist.")
+ }
+
+ if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) != NULL ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Inserted entry in clean slist.")
+ }
+ }
+
+ aux_ptr->dirty_bytes += size;
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->insert_dirty_bytes += size;
+ aux_ptr->insert_dirty_bytes_updates += 1;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_log_inserted_entry() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC_log_renamed_entry()
+ *
+ * Purpose: Update the dirty_bytes count for a renamed entry.
+ *
+ * WARNING
+ *
+ * At present, the way that the rename call is used ensures
+ * that the renamed entry is present in all caches by
+ * renaming in a collective operation and immediately after
+ * unprotecting the target entry.
+ *
+ * This function uses this invarient, and will cause arcane
+ * failures if it is not met. If maintaining this invarient
+ * becomes impossible, we will have to rework this function
+ * extensively, and likely include a bit of IPC for
+ * synchronization. A better option might be to subsume
+ * rename in the unprotect operation.
+ *
+ * Given that the target entry is in all caches, the function
+ * proceeds as follows:
+ *
+ * For processes with mpi rank other 0, it simply checks to
+ * see if the entry was dirty prior to the rename, and adds
+ * the entries size to the dirty bytes count.
+ *
+ * In the process with mpi rank 0, the function first checks
+ * to see if the entry was dirty prior to the rename. If it
+ * was, and if the entry doesn't appear in the dirtied list
+ * under its old address, it adds the entry's size to the
+ * dirty bytes count.
+ *
+ * The rank 0 process then removes any references to the
+ * entry under its old address from the cleands and dirtied
+ * lists, and inserts an entry in the dirtied list under the
+ * new address.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 6/30/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_log_renamed_entry(H5AC_t * cache_ptr,
+ haddr_t old_addr,
+ haddr_t new_addr)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ hbool_t entry_in_cache;
+ hbool_t entry_dirty;
+ size_t entry_size;
+ H5AC_aux_t * aux_ptr = NULL;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+
+ FUNC_ENTER_NOAPI(H5AC_log_renamed_entry, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+
+ /* get entry status, size, etc here */
+ if ( H5C_get_entry_status(cache_ptr, old_addr, &entry_size, &entry_in_cache,
+ &entry_dirty, NULL) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't get entry status.")
+
+ } else if ( ! entry_in_cache ) {
+
+ HDassert( entry_in_cache );
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry not in cache.")
+ }
+
+ if ( aux_ptr->mpi_rank == 0 ) {
+
+ HDassert( aux_ptr->d_slist_ptr != NULL );
+ HDassert( aux_ptr->c_slist_ptr != NULL );
+
+ /* if the entry appears in the cleaned entry slist, under its old
+ * address, remove it.
+ */
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr,
+ (void *)(&old_addr))) != NULL ) {
+
+ HDassert( slist_entry_ptr->magic ==
+ H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
+ HDassert( slist_entry_ptr->addr == old_addr );
+
+ if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&old_addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from cleaned entry slist.")
+ }
+
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->c_slist_len -= 1;
+
+ HDassert( aux_ptr->c_slist_len >= 0 );
+ }
+
+ /* if the entry appears in the dirtied entry slist under its old
+ * address, remove it, but don't free it. Set addr to new_addr.
+ */
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr,
+ (void *)(&old_addr))) != NULL ) {
+
+ HDassert( slist_entry_ptr->magic ==
+ H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
+ HDassert( slist_entry_ptr->addr == old_addr );
+
+ if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&old_addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from dirty entry slist.")
+ }
+
+ slist_entry_ptr->addr = new_addr;
+
+ aux_ptr->d_slist_len -= 1;
+
+ HDassert( aux_ptr->d_slist_len >= 0 );
} else {
+
+ /* otherwise, allocate a new entry that is ready
+ * for insertion, and increment dirty_bytes.
+ *
+ * Note that the fact that the entry wasn't in the dirtied
+ * list under its old address implies that it must have
+ * been clean to start with.
+ */
+
+ HDassert( !entry_dirty );
+
+ if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "Can't allocate dirty slist entry .")
+ }
+
+ slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC;
+ slist_entry_ptr->addr = new_addr;
- HDassert(xfer_mode == H5FD_MPIO_COLLECTIVE );
+ aux_ptr->dirty_bytes += entry_size;
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->rename_dirty_bytes += entry_size;
+ aux_ptr->rename_dirty_bytes_updates += 1;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
}
+
+ /* verify that there is no entry at new_addr in the dirty slist */
+ if ( H5SL_search(aux_ptr->d_slist_ptr, (void *)(&new_addr)) != NULL ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "dirty slist already contains entry at new_addr.")
+ }
+
+ /* insert / reinsert the entry in the dirty slist */
+ if ( H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr,
+ &(slist_entry_ptr->addr)) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \
+ "can't insert entry into dirty entry slist.")
+ }
+
+ aux_ptr->d_slist_len += 1;
+
+ } else if ( ! entry_dirty ) {
+
+ aux_ptr->dirty_bytes += entry_size;
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->rename_dirty_bytes += entry_size;
+ aux_ptr->rename_dirty_bytes_updates += 1;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
}
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_log_renamed_entry() */
#endif /* H5_HAVE_PARALLEL */
- *write_permitted_ptr = write_permitted;
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC_propagate_flushed_and_still_clean_entries_list
+ *
+ * Purpose: In PHDF5, only the metadata cache with mpi rank 0 is allowed
+ * to write to file. All other metadata caches on processes
+ * with rank greater than 0 must retain dirty entries until
+ * they are notified that the entry is now clean.
+ *
+ * This function is the main routine for that proceedure.
+ * It must be called simultaniously on all processes that
+ * have the relevant file open. To this end, there must
+ * be a barrier immediately prior to this call.
+ *
+ * Typicaly, this will be done one of two ways:
+ *
+ * 1) Dirty byte creation exceeds some user specified value.
+ *
+ * While metadata reads may occur independently, all
+ * operations writing metadata must be collective. Thus
+ * all metadata caches see the same sequence of operations,
+ * and therefore the same dirty data creation.
+ *
+ * This fact is used to synchronize the caches for purposes
+ * of propagating the list of flushed and still clean
+ * entries, by simply calling this function from all
+ * caches whenever some user specified threshold on dirty
+ * data is exceeded.
+ *
+ * 2) Under direct user control -- this operation must be
+ * collective.
+ *
+ * The operations to be managed by this function are as
+ * follows:
+ *
+ * For the process with mpi rank 0:
+ *
+ * 1) Enable writes, flush the cache to its min clean size,
+ * and then disable writes again.
+ *
+ * 2) Load the contents of the flushed and still clean entries
+ * list (c_slist_ptr) into a buffer, and broadcast that
+ * buffer to all the other caches.
+ *
+ * 3) Clear the flushed and still clean entries list
+ * (c_slist_ptr).
+ *
+ *
+ * For all processes with mpi rank greater than 0:
+ *
+ * 1) Receive the flushed and still clean entries list broadcast
+ *
+ * 2) Mark the specified entries as clean.
+ *
+ *
+ * For all processes:
+ *
+ * 1) Reset the dirtied bytes count to 0.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * July 5, 2005
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+herr_t
+H5AC_propagate_flushed_and_still_clean_entries_list(H5F_t * f,
+ hid_t dxpl_id,
+ H5AC_t * cache_ptr,
+ hbool_t do_barrier)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ herr_t result;
+ int mpi_code;
+ H5AC_aux_t * aux_ptr = NULL;
+
+ FUNC_ENTER_NOAPI(H5AC_propagate_flushed_and_still_clean_entries_list, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ HDfprintf(stdout,
+ "%d:H5AC_propagate...:%d: (u/uu/i/iu/r/ru) = %d/%d/%d/%d/%d/%d\n",
+ (int)(aux_ptr->mpi_rank),
+ (int)(aux_ptr->dirty_bytes_propagations),
+ (int)(aux_ptr->unprotect_dirty_bytes),
+ (int)(aux_ptr->unprotect_dirty_bytes_updates),
+ (int)(aux_ptr->insert_dirty_bytes),
+ (int)(aux_ptr->insert_dirty_bytes_updates),
+ (int)(aux_ptr->rename_dirty_bytes),
+ (int)(aux_ptr->rename_dirty_bytes_updates));
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+
+ if ( do_barrier ) {
+
+ /* to prevent "messages from the future" we must synchronize all
+ * processes before we start the flush. This synchronization may
+ * already be done -- hence the do_barrier parameter.
+ */
+
+ if ( MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)) ) {
+
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code)
+ }
+ }
+
+ if ( aux_ptr->mpi_rank == 0 ) {
+
+ aux_ptr->write_permitted = TRUE;
+
+ result = H5C_flush_to_min_clean(f, dxpl_id, H5AC_noblock_dxpl_id,
+ cache_ptr);
+
+ aux_ptr->write_permitted = FALSE;
+
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "H5C_flush_to_min_clean() failed.")
+ }
+
+ if ( H5AC_broadcast_clean_list(cache_ptr) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Can't broadcast clean slist.")
+ }
+
+ HDassert( aux_ptr->c_slist_len == 0 );
+
+ } else {
+
+ if ( H5AC_receive_and_apply_clean_list(f, dxpl_id,
+ H5AC_noblock_dxpl_id,
+ cache_ptr) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Can't receive and/or process clean slist broadcast.")
+ }
+ }
+
+ aux_ptr->dirty_bytes = 0;
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->dirty_bytes_propagations += 1;
+ aux_ptr->unprotect_dirty_bytes = 0;
+ aux_ptr->unprotect_dirty_bytes_updates = 0;
+ aux_ptr->insert_dirty_bytes = 0;
+ aux_ptr->insert_dirty_bytes_updates = 0;
+ aux_ptr->rename_dirty_bytes = 0;
+ aux_ptr->rename_dirty_bytes_updates = 0;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
done:
FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC_check_if_write_permitted() */
+} /* H5AC_propagate_flushed_and_still_clean_entries_list() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC_receive_and_apply_clean_list()
+ *
+ * Purpose: Receive the list of cleaned entries from process 0,
+ * and mark the specified entries as clean.
+ *
+ * This function must only be called by the process with
+ * MPI_rank greater than 0.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 7/4/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_receive_and_apply_clean_list(H5F_t * f,
+ hid_t primary_dxpl_id,
+ hid_t secondary_dxpl_id,
+ H5AC_t * cache_ptr)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ H5AC_aux_t * aux_ptr = NULL;
+ haddr_t * haddr_buf_ptr = NULL;
+ MPI_Offset * MPI_Offset_buf_ptr = NULL;
+ size_t buf_size;
+ int i = 0;
+ int mpi_result;
+ int num_entries;
+
+ FUNC_ENTER_NOAPI(H5AC_receive_and_apply_clean_list, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->mpi_rank != 0 );
+
+ /* First receive the number of entries in the list so that we
+ * can set up a buffer to receive them. If there aren't
+ * any, we are done.
+ */
+ mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm);
+
+ if ( mpi_result != MPI_SUCCESS ) {
+
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 1", mpi_result)
+ }
+
+ if ( num_entries > 0 )
+ {
+ /* allocate a buffers to store the list of entry base addresses in */
+
+ buf_size = sizeof(MPI_Offset) * (size_t)num_entries;
+
+ MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size);
+
+ if ( MPI_Offset_buf_ptr == NULL ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "memory allocation failed for receive buffer")
+ }
+
+ haddr_buf_ptr = (haddr_t *)H5MM_malloc(sizeof(haddr_t) *
+ (size_t)num_entries);
+
+ if ( haddr_buf_ptr == NULL ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "memory allocation failed for haddr buffer")
+ }
+
+
+ /* Now receive the list of cleaned entries
+ *
+ * The peculiar structure of the following call to MPI_Bcast is
+ * due to MPI's (?) failure to believe in the MPI_Offset type.
+ * Thus the element type is MPI_BYTE, with size equal to the
+ * buf_size computed above.
+ */
+
+ mpi_result = MPI_Bcast((void *)MPI_Offset_buf_ptr, (int)buf_size,
+ MPI_BYTE, 0, aux_ptr->mpi_comm);
+
+ if ( mpi_result != MPI_SUCCESS ) {
+
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result)
+ }
+
+
+ /* translate the MPI_Offsets to haddr_t */
+ i = 0;
+ while ( i < num_entries )
+ {
+ haddr_buf_ptr[i] = H5FD_mpi_MPIOff_to_haddr(MPI_Offset_buf_ptr[i]);
+
+ if ( haddr_buf_ptr[i] == HADDR_UNDEF ) {
+
+ HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, \
+ "can't convert MPI off to haddr")
+ }
+
+ i++;
+ }
+
+
+ /* mark the indicated entries as clean */
+ if ( H5C_mark_entries_as_clean(f, primary_dxpl_id, secondary_dxpl_id,
+ cache_ptr, (int32_t)num_entries,
+ &(haddr_buf_ptr[0])) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Can't mark entries clean.")
+
+ }
+ }
+
+done:
+
+ if ( MPI_Offset_buf_ptr != NULL ) {
+
+ MPI_Offset_buf_ptr =
+ (MPI_Offset *)H5MM_xfree((void *)MPI_Offset_buf_ptr);
+ }
+
+ if ( haddr_buf_ptr != NULL ) {
+
+ haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_receive_and_apply_clean_list() */
+#endif /* H5_HAVE_PARALLEL */
+
diff --git a/src/H5ACprivate.h b/src/H5ACprivate.h
index 450907f..3cbe62e 100644
--- a/src/H5ACprivate.h
+++ b/src/H5ACprivate.h
@@ -182,7 +182,7 @@ extern hid_t H5AC_ind_dxpl_id;
/* int version = */ H5C__CURR_AUTO_SIZE_CTL_VER, \
/* hbool_t rpt_fcn_enabled = */ FALSE, \
/* hbool_t set_initial_size = */ TRUE, \
- /* size_t initial_size = */ (1 * 1024 * 1024), \
+ /* size_t initial_size = */ ( 1 * 1024 * 1024), \
/* double min_clean_fraction = */ 0.25, \
/* size_t max_size = */ (16 * 1024 * 1024), \
/* size_t min_size = */ ( 1 * 1024 * 1024), \
@@ -216,6 +216,7 @@ extern hid_t H5AC_ind_dxpl_id;
#define H5AC__SET_FLUSH_MARKER_FLAG H5C__SET_FLUSH_MARKER_FLAG
#define H5AC__DELETED_FLAG H5C__DELETED_FLAG
#define H5AC__DIRTIED_FLAG H5C__DIRTIED_FLAG
+#define H5AC__SIZE_CHANGED_FLAG H5C__SIZE_CHANGED_FLAG
#define H5AC__FLUSH_INVALIDATE_FLAG H5C__FLUSH_INVALIDATE_FLAG
#define H5AC__FLUSH_CLEAR_ONLY_FLAG H5C__FLUSH_CLEAR_ONLY_FLAG
#define H5AC__FLUSH_MARKED_ENTRIES_FLAG H5C__FLUSH_MARKED_ENTRIES_FLAG
@@ -235,6 +236,7 @@ H5_DLL herr_t H5AC_unprotect(H5F_t *f, hid_t dxpl_id,
H5_DLL herr_t H5AC_flush(H5F_t *f, hid_t dxpl_id, unsigned flags);
H5_DLL herr_t H5AC_rename(H5F_t *f, const H5AC_class_t *type,
haddr_t old_addr, haddr_t new_addr);
+
H5_DLL herr_t H5AC_dest(H5F_t *f, hid_t dxpl_id);
H5_DLL herr_t H5AC_stats(const H5F_t *f);
diff --git a/src/H5C.c b/src/H5C.c
index afad50b..8ac73e7 100644
--- a/src/H5C.c
+++ b/src/H5C.c
@@ -200,6 +200,12 @@
* caused compiler warnings.
* JRM - 1/10/05
*
+ * - Added the H5C__DLL_UPDATE_FOR_SIZE_CHANGE macro and the associated
+ * sanity checking macros. These macro are used to update the size of
+ * a DLL when one of its entries changes size.
+ *
+ * JRM - 9/8/05
+ *
****************************************************************************/
#if H5C_DO_SANITY_CHECKS
@@ -267,11 +273,29 @@ if ( ( (entry_ptr) == NULL ) || \
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "DLL pre insert SC failed") \
}
+#define H5C__DLL_PRE_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size) \
+if ( ( (dll_len) <= 0 ) || \
+ ( (dll_size) <= 0 ) || \
+ ( (old_size) <= 0 ) || \
+ ( (old_size) > (dll_size) ) || \
+ ( (new_size) <= 0 ) || \
+ ( ( (dll_len) == 1 ) && ( (old_size) != (dll_size) ) ) ) { \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "DLL pre size update SC failed") \
+}
+
+#define H5C__DLL_POST_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size) \
+if ( ( (new_size) > (dll_size) ) || \
+ ( ( (dll_len) == 1 ) && ( (new_size) != (dll_size) ) ) ) { \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "DLL post size update SC failed") \
+}
+
#else /* H5C_DO_SANITY_CHECKS */
#define H5C__DLL_PRE_REMOVE_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv)
#define H5C__DLL_SC(head_ptr, tail_ptr, len, Size, fv)
#define H5C__DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv)
+#define H5C__DLL_PRE_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size)
+#define H5C__DLL_POST_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size)
#endif /* H5C_DO_SANITY_CHECKS */
@@ -344,6 +368,11 @@ if ( ( (entry_ptr) == NULL ) || \
(Size) -= entry_ptr->size; \
}
+#define H5C__DLL_UPDATE_FOR_SIZE_CHANGE(dll_len, dll_size, old_size, new_size) \
+ H5C__DLL_PRE_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size) \
+ (dll_size) -= (old_size); \
+ (dll_size) += (new_size); \
+ H5C__DLL_POST_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size)
#if H5C_DO_SANITY_CHECKS
@@ -534,6 +563,19 @@ if ( ( (entry_ptr) == NULL ) || \
#define H5C__UPDATE_STATS_FOR_RENAME(cache_ptr, entry_ptr) \
(((cache_ptr)->renames)[(entry_ptr)->type->id])++;
+#define H5C__UPDATE_STATS_FOR_ENTRY_SIZE_CHANGE(cache_ptr, entry_ptr, new_size)\
+ if ( (entry_ptr)->size < (new_size) ) { \
+ ((cache_ptr)->size_increases[(entry_ptr)->type->id])++; \
+ if ( (cache_ptr)->index_size > (cache_ptr)->max_index_size ) \
+ (cache_ptr)->max_index_size = (cache_ptr)->index_size; \
+ if ( (cache_ptr)->slist_size > (cache_ptr)->max_slist_size ) \
+ (cache_ptr)->max_slist_size = (cache_ptr)->slist_size; \
+ if ( (cache_ptr)->pl_size > (cache_ptr)->max_pl_size ) \
+ (cache_ptr)->max_pl_size = (cache_ptr)->pl_size; \
+ } else { \
+ ((cache_ptr)->size_decreases[(entry_ptr)->type->id])++; \
+ }
+
#define H5C__UPDATE_STATS_FOR_HT_INSERTION(cache_ptr) \
(cache_ptr)->total_ht_insertions++;
@@ -646,6 +688,7 @@ if ( ( (entry_ptr) == NULL ) || \
#define H5C__RESET_CACHE_ENTRY_STATS(entry_ptr)
#define H5C__UPDATE_STATS_FOR_UNPROTECT(cache_ptr)
#define H5C__UPDATE_STATS_FOR_RENAME(cache_ptr, entry_ptr)
+#define H5C__UPDATE_STATS_FOR_ENTRY_SIZE_CHANGE(cache_ptr, entry_ptr, new_size)
#define H5C__UPDATE_STATS_FOR_HT_INSERTION(cache_ptr)
#define H5C__UPDATE_STATS_FOR_HT_DELETION(cache_ptr)
#define H5C__UPDATE_STATS_FOR_HT_SEARCH(cache_ptr, success, depth)
@@ -748,9 +791,32 @@ if ( ( (cache_ptr) == NULL ) || \
( ((cache_ptr)->index)[k] != (entry_ptr) ) || \
( (entry_ptr)->ht_prev != NULL ) ) { \
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, \
- "Post HT shift to front SC failed") \
+ "Post HT shift to front SC failed") \
+}
+
+#define H5C__PRE_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) \
+if ( ( (cache_ptr) == NULL ) || \
+ ( (cache_ptr)->index_len <= 0 ) || \
+ ( (cache_ptr)->index_size <= 0 ) || \
+ ( (new_size) <= 0 ) || \
+ ( (old_size) > (cache_ptr)->index_size ) || \
+ ( (new_size) <= 0 ) || \
+ ( ( (cache_ptr)->index_len == 1 ) && \
+ ( (cache_ptr)->index_size != (old_size) ) ) ) { \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Pre HT entry size change SC failed") \
}
+#define H5C__POST_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) \
+if ( ( (cache_ptr) == NULL ) || \
+ ( (cache_ptr)->index_len <= 0 ) || \
+ ( (cache_ptr)->index_size <= 0 ) || \
+ ( (new_size) > (cache_ptr)->index_size ) || \
+ ( ( (cache_ptr)->index_len == 1 ) && \
+ ( (cache_ptr)->index_size != (new_size) ) ) ) { \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Post HT entry size change SC failed") \
+}
#else /* H5C_DO_SANITY_CHECKS */
@@ -759,6 +825,8 @@ if ( ( (cache_ptr) == NULL ) || \
#define H5C__PRE_HT_SEARCH_SC(cache_ptr, Addr, fail_val)
#define H5C__POST_SUC_HT_SEARCH_SC(cache_ptr, entry_ptr, Addr, k, fail_val)
#define H5C__POST_HT_SHIFT_TO_FRONT(cache_ptr, entry_ptr, k, fail_val)
+#define H5C__PRE_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size)
+#define H5C__POST_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size)
#endif /* H5C_DO_SANITY_CHECKS */
@@ -840,6 +908,14 @@ if ( ( (cache_ptr) == NULL ) || \
H5C__UPDATE_STATS_FOR_HT_SEARCH(cache_ptr, (entry_ptr != NULL), depth) \
}
+#define H5C__UPDATE_INDEX_FOR_SIZE_CHANGE(cache_ptr, old_size, new_size) \
+{ \
+ H5C__PRE_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) \
+ (cache_ptr)->index_size -= old_size; \
+ (cache_ptr)->index_size += new_size; \
+ H5C__POST_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) \
+}
+
/**************************************************************************
*
@@ -896,7 +972,7 @@ if ( ( (cache_ptr) == NULL ) || \
HDassert( H5F_addr_defined((entry_ptr)->addr) ); \
HDassert( !((entry_ptr)->in_slist) ); \
\
- if ( H5SL_insert((cache_ptr)->slist_ptr, &(entry_ptr)->addr, entry_ptr) \
+ if ( H5SL_insert((cache_ptr)->slist_ptr, entry_ptr, &(entry_ptr)->addr) \
< 0 ) \
HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, FAIL, \
"Can't insert entry in skip list") \
@@ -963,6 +1039,44 @@ if ( ( (cache_ptr) == NULL ) || \
} /* H5C__REMOVE_ENTRY_FROM_SLIST */
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5C__UPDATE_SLIST_FOR_SIZE_CHANGE
+ *
+ * Purpose: Update cache_ptr->slist_size for a change in the size of
+ * and entry in the slist.
+ *
+ * Return: N/A
+ *
+ * Programmer: John Mainzer, 9/07/05
+ *
+ * Modifications:
+ *
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define H5C__UPDATE_SLIST_FOR_SIZE_CHANGE(cache_ptr, old_size, new_size) \
+{ \
+ HDassert( (cache_ptr) ); \
+ HDassert( (cache_ptr)->magic == H5C__H5C_T_MAGIC ); \
+ HDassert( (old_size) > 0 ); \
+ HDassert( (new_size) > 0 ); \
+ HDassert( (old_size) <= (cache_ptr)->slist_size ); \
+ HDassert( (cache_ptr)->slist_len > 0 ); \
+ HDassert( ((cache_ptr)->slist_len > 1) || \
+ ( (cache_ptr)->slist_size == (old_size) ) ); \
+ \
+ (cache_ptr)->slist_size -= (old_size); \
+ (cache_ptr)->slist_size += (new_size); \
+ \
+ HDassert( (new_size) <= (cache_ptr)->slist_size ); \
+ HDassert( ( (cache_ptr)->slist_len > 1 ) || \
+ ( (cache_ptr)->slist_size == (new_size) ) ); \
+} /* H5C__REMOVE_ENTRY_FROM_SLIST */
+
+
/**************************************************************************
*
* Replacement policy update macros:
@@ -1732,6 +1846,11 @@ static herr_t H5C_make_space_in_cache(H5F_t * f,
size_t space_needed,
hbool_t write_permitted,
hbool_t * first_flush_ptr);
+#if H5C_DO_EXTREME_SANITY_CHECKS
+static herr_t H5C_validate_lru_list(H5C_t * cache_ptr);
+static herr_t H5C_verify_not_in_index(H5C_t * cache_ptr,
+ H5C_cache_entry_t * entry_ptr);
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
/****************************************************************************
@@ -1874,8 +1993,8 @@ done:
*
* The check_write_permitted parameter must either be NULL,
* or point to a function of type H5C_write_permitted_func_t.
- * If it is NULL, the cache will presume that writes are
- * always permitted.
+ * If it is NULL, the cache will use the write_permitted
+ * flag to determine whether writes are permitted.
*
* Return: Success: Pointer to the new instance.
*
@@ -1901,6 +2020,16 @@ done:
* Added/updated initialization for the automatic cache
* size control data structures.
*
+ * JRM -- 6/24/05
+ * Added support for the new write_permitted field of
+ * the H5C_t structure.
+ *
+ * JRM -- 7/5/05
+ * Added the new log_flush parameter and supporting code.
+ *
+ * JRM -- 9/21/05
+ * Added the new aux_ptr parameter and supporting code.
+ *
*-------------------------------------------------------------------------
*/
@@ -1909,7 +2038,10 @@ H5C_create(size_t max_cache_size,
size_t min_clean_size,
int max_type_id,
const char * (* type_name_table_ptr),
- H5C_write_permitted_func_t check_write_permitted)
+ H5C_write_permitted_func_t check_write_permitted,
+ hbool_t write_permitted,
+ H5C_log_flush_func_t log_flush,
+ void * aux_ptr)
{
int i;
H5C_t * cache_ptr = NULL;
@@ -1925,6 +2057,8 @@ H5C_create(size_t max_cache_size,
HDassert( max_type_id < H5C__MAX_NUM_TYPE_IDS );
HDassert( type_name_table_ptr );
+ HDassert( ( write_permitted == TRUE ) || ( write_permitted == FALSE ) );
+
for ( i = 0; i <= max_type_id; i++ ) {
HDassert( (type_name_table_ptr)[i] );
@@ -1950,6 +2084,8 @@ H5C_create(size_t max_cache_size,
cache_ptr->magic = H5C__H5C_T_MAGIC;
+ cache_ptr->aux_ptr = aux_ptr;
+
cache_ptr->max_type_id = max_type_id;
cache_ptr->type_name_table_ptr = type_name_table_ptr;
@@ -1957,6 +2093,9 @@ H5C_create(size_t max_cache_size,
cache_ptr->min_clean_size = min_clean_size;
cache_ptr->check_write_permitted = check_write_permitted;
+ cache_ptr->write_permitted = write_permitted;
+
+ cache_ptr->log_flush = log_flush;
cache_ptr->index_len = 0;
cache_ptr->index_size = (size_t)0;
@@ -2426,11 +2565,13 @@ H5C_flush_cache(H5F_t * f,
hbool_t first_flush = TRUE;
int32_t protected_entries = 0;
int32_t i;
- H5SL_node_t * node_ptr;
- H5C_cache_entry_t * entry_ptr;
+ H5SL_node_t * node_ptr = NULL;
+ H5C_cache_entry_t * entry_ptr = NULL;
#if H5C_DO_SANITY_CHECKS
int32_t actual_slist_len = 0;
+ int32_t initial_slist_len = 0;
size_t actual_slist_size = 0;
+ size_t initial_slist_size = 0;
#endif /* H5C_DO_SANITY_CHECKS */
FUNC_ENTER_NOAPI(H5C_flush_cache, FAIL)
@@ -2468,12 +2609,28 @@ H5C_flush_cache(H5F_t * f,
} else {
node_ptr = H5SL_first(cache_ptr->slist_ptr);
+
+#if H5C_DO_SANITY_CHECKS
+ /* H5C_flush_single_entry() now removes dirty entries from the
+ * slist as it flushes them. Thus for sanity checks we must
+ * make note of the initial slist length and size before we
+ * do any flushes.
+ */
+ initial_slist_len = cache_ptr->slist_len;
+ initial_slist_size = cache_ptr->slist_size;
+#endif /* H5C_DO_SANITY_CHECKS */
+
}
while ( node_ptr != NULL )
{
entry_ptr = (H5C_cache_entry_t *)H5SL_item(node_ptr);
+ /* increment node pointer now, before we delete its target
+ * from the slist.
+ */
+ node_ptr = H5SL_next(node_ptr);
+
HDassert( entry_ptr != NULL );
HDassert( entry_ptr->in_slist );
@@ -2512,14 +2669,22 @@ H5C_flush_cache(H5F_t * f,
}
}
}
-
- node_ptr = H5SL_next(node_ptr);
-
} /* while */
#if H5C_DO_SANITY_CHECKS
- HDassert( actual_slist_len == cache_ptr->slist_len );
- HDassert( actual_slist_size == cache_ptr->slist_size );
+ HDassert( actual_slist_len == initial_slist_len );
+ HDassert( actual_slist_size == initial_slist_size );
+
+ if ( (flags & H5C__FLUSH_INVALIDATE_FLAG) != 0 ) {
+
+ HDassert( cache_ptr->slist_len == initial_slist_len );
+ HDassert( cache_ptr->slist_size == initial_slist_size );
+
+ } else if ( ! flush_marked_entries ) {
+
+ HDassert( cache_ptr->slist_len == 0 );
+ HDassert( cache_ptr->slist_size == 0 );
+ }
#endif /* H5C_DO_SANITY_CHECKS */
if ( destroy ) {
@@ -2625,6 +2790,95 @@ done:
/*-------------------------------------------------------------------------
+ * Function: H5C_flush_to_min_clean
+ *
+ * Purpose: Flush dirty entries until the caches min clean size is
+ * attained.
+ *
+ * This function is used in the implementation of the
+ * metadata cache in PHDF5. To avoid "messages from the
+ * future", the cache on process 0 can't be allowed to
+ * flush entries until the other processes have reached
+ * the same point in the calculation. If this constraint
+ * is not met, it is possible that the other processes will
+ * read metadata generated at a future point in the
+ * computation.
+ *
+ *
+ * Return: Non-negative on success/Negative on failure or if
+ * write is not permitted.
+ *
+ * Programmer: John Mainzer
+ * 9/16/05
+ *
+ * Modifications:
+ *
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5C_flush_to_min_clean(H5F_t * f,
+ hid_t primary_dxpl_id,
+ hid_t secondary_dxpl_id,
+ H5C_t * cache_ptr)
+{
+ herr_t result;
+ herr_t ret_value = SUCCEED;
+ hbool_t first_flush = TRUE;
+ hbool_t write_permitted;
+
+ FUNC_ENTER_NOAPI(H5C_flush_to_min_clean, FAIL)
+
+ HDassert( cache_ptr );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+ HDassert( cache_ptr->skip_file_checks || f );
+
+ if ( cache_ptr->check_write_permitted != NULL ) {
+
+ result = (cache_ptr->check_write_permitted)(f,
+ primary_dxpl_id,
+ &write_permitted);
+
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Can't get write_permitted")
+ }
+ } else {
+
+ write_permitted = cache_ptr->write_permitted;
+ }
+
+ if ( ! write_permitted ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "cache write is not permitted!?!\n");
+ }
+
+
+ result = H5C_make_space_in_cache(f,
+ primary_dxpl_id,
+ secondary_dxpl_id,
+ cache_ptr,
+ 0,
+ write_permitted,
+ &first_flush);
+
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "H5C_make_space_in_cache failed.")
+ }
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5C_flush_to_min_clean() */
+
+
+/*-------------------------------------------------------------------------
* Function: H5C_get_cache_auto_resize_config
*
* Purpose: Copy the current configuration of the cache automatic
@@ -2798,6 +3052,93 @@ done:
/*-------------------------------------------------------------------------
+ *
+ * Function: H5C_get_entry_status
+ *
+ * Purpose: This function is used to determine whether the cache
+ * contains an entry with the specified base address. If
+ * the entry exists, it also reports some status information
+ * on the entry.
+ *
+ * Status information is reported in the locations pointed
+ * to by the size_ptr, in_cache_ptr, is_dirty_ptr, and
+ * is_protected_ptr. While in_cache_ptr must be defined,
+ * the remaining pointers may be NULL, in which case the
+ * associated data is not reported.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer
+ * 7/1/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+herr_t
+H5C_get_entry_status(H5C_t * cache_ptr,
+ haddr_t addr,
+ size_t * size_ptr,
+ hbool_t * in_cache_ptr,
+ hbool_t * is_dirty_ptr,
+ hbool_t * is_protected_ptr)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ H5C_cache_entry_t * entry_ptr = NULL;
+
+ FUNC_ENTER_NOAPI(H5C_get_entry_status, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+ HDassert( H5F_addr_defined(addr) );
+ HDassert( in_cache_ptr != NULL );
+
+ /* this test duplicates tow of the above asserts, but we need an
+ * invocation of HGOTO_ERROR to keep the compiler happy.
+ */
+ if ( ( cache_ptr == NULL ) || ( cache_ptr->magic != H5C__H5C_T_MAGIC ) ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Bad cache_ptr on entry.")
+ }
+
+ H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL)
+
+ if ( entry_ptr == NULL ) {
+
+ /* the entry doesn't exist in the cache -- report this
+ * and quit.
+ */
+ *in_cache_ptr = FALSE;
+
+ } else {
+
+ *in_cache_ptr = TRUE;
+
+ if ( size_ptr != NULL ) {
+
+ *size_ptr = entry_ptr->size;
+ }
+
+ if ( is_dirty_ptr != NULL ) {
+
+ *is_dirty_ptr = entry_ptr->is_dirty;
+ }
+
+ if ( is_protected_ptr != NULL ) {
+
+ *is_protected_ptr = entry_ptr->is_protected;
+ }
+ }
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5C_get_entry_status() */
+
+
+/*-------------------------------------------------------------------------
* Function: H5C_insert_entry
*
* Purpose: Adds the specified thing to the cache. The thing need not
@@ -2845,6 +3186,10 @@ done:
* This is part of a set of changes moving management of the
* is_dirty field of H5C_cache_entry_t into the H5C code.
*
+ * JRM -- 6/24/05
+ * Added support for the new write_permitted field of
+ * the H5C_t structure.
+ *
*-------------------------------------------------------------------------
*/
@@ -2877,6 +3222,21 @@ H5C_insert_entry(H5F_t * f,
HDassert( H5F_addr_defined(addr) );
HDassert( thing );
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( H5C_verify_not_in_index(cache_ptr, (H5C_cache_entry_t *)thing) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "thing already in index.\n");
+ }
+#endif /* H5C_DO_SANITY_CHECKS */
+
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( H5C_validate_lru_list(cache_ptr) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "LRU sanity check failed.\n");
+ }
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
set_flush_marker = ( (flags & H5C__SET_FLUSH_MARKER_FLAG) != 0 );
entry_ptr = (H5C_cache_entry_t *)thing;
@@ -2897,6 +3257,10 @@ H5C_insert_entry(H5F_t * f,
entry_ptr->in_slist = FALSE;
+#ifdef H5_HAVE_PARALLEL
+ entry_ptr->clear_on_unprotect = FALSE;
+#endif /* H5_HAVE_PARALLEL */
+
entry_ptr->ht_next = NULL;
entry_ptr->ht_prev = NULL;
@@ -2925,6 +3289,9 @@ H5C_insert_entry(H5F_t * f,
HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, \
"Can't get write_permitted")
}
+ } else {
+
+ write_permitted = cache_ptr->write_permitted;
}
HDassert( entry_ptr->size <= H5C_MAX_ENTRY_SIZE );
@@ -3022,10 +3389,26 @@ H5C_insert_entry(H5F_t * f,
H5C__UPDATE_RP_FOR_INSERTION(cache_ptr, entry_ptr, FAIL)
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( H5C_validate_lru_list(cache_ptr) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "LRU sanity check failed.\n");
+ }
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
H5C__UPDATE_STATS_FOR_INSERTION(cache_ptr, entry_ptr)
done:
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( H5C_validate_lru_list(cache_ptr) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "LRU sanity check failed.\n");
+ }
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
FUNC_LEAVE_NOAPI(ret_value)
} /* H5C_insert_entry() */
@@ -3033,6 +3416,171 @@ done:
/*-------------------------------------------------------------------------
*
+ * Function: H5C_mark_entries_as_clean
+ *
+ * Purpose: When the H5C code is used to implement the metadata caches
+ * in PHDF5, only the cache with MPI_rank 0 is allowed to
+ * actually write entries to disk -- all other caches must
+ * retain dirty entries until they are advised that the
+ * entries are clean.
+ *
+ * This function exists to allow the H5C code to receive these
+ * notifications.
+ *
+ * The function receives a list of entry base addresses
+ * which must refer to dirty entries in the cache. If any
+ * of the entries are either clean or don't exist, the
+ * function flags an error.
+ *
+ * The function scans the list of entries and flushes all
+ * those that are currently unprotected with the
+ * H5C__FLUSH_CLEAR_ONLY_FLAG. Those that are currently
+ * protected are flagged for clearing when they are
+ * unprotected.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer
+ * 7/5/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+herr_t
+H5C_mark_entries_as_clean(H5F_t * f,
+ hid_t primary_dxpl_id,
+ hid_t secondary_dxpl_id,
+ H5C_t * cache_ptr,
+ int32_t ce_array_len,
+ haddr_t * ce_array_ptr)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ hbool_t first_flush = TRUE;
+ int i;
+ haddr_t addr;
+#if H5C_DO_SANITY_CHECKS
+ haddr_t last_addr;
+#endif /* H5C_DO_SANITY_CHECKS */
+ H5C_cache_entry_t * entry_ptr = NULL;
+
+ FUNC_ENTER_NOAPI(H5C_mark_entries_as_clean, FAIL)
+
+ HDassert( cache_ptr );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+ HDassert( cache_ptr->skip_file_checks || f );
+
+ HDassert( ce_array_len > 0 );
+ HDassert( ce_array_ptr != NULL );
+
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( H5C_validate_lru_list(cache_ptr) < 0 ) {
+
+ HDassert(0);
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "LRU sanity check failed.\n");
+ }
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
+ for ( i = 0; i < ce_array_len; i++ )
+ {
+ addr = ce_array_ptr[i];
+
+#if H5C_DO_SANITY_CHECKS
+ if ( i == 0 ) {
+
+ last_addr = addr;
+
+ } else {
+
+ if ( last_addr == addr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Duplicate entry in cleaned list.\n");
+
+ } else if ( last_addr > addr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "cleaned list not sorted.\n");
+ }
+ }
+
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( H5C_validate_lru_list(cache_ptr) < 0 ) {
+
+ HDassert(0);
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "LRU sanity check failed.\n");
+ }
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ HDassert( H5F_addr_defined(addr) );
+
+ H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL)
+
+ if ( entry_ptr == NULL ) {
+#if H5C_DO_SANITY_CHECKS
+ HDfprintf(stdout,
+ "H5C_mark_entries_as_clean: entry[%d] = %ld not in cache.\n",
+ (int)i,
+ (long)addr);
+#endif /* H5C_DO_SANITY_CHECKS */
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Listed entry not in cache?!?!?.")
+
+ } else if ( ! entry_ptr->is_dirty ) {
+
+#if H5C_DO_SANITY_CHECKS
+ HDfprintf(stdout,
+ "H5C_mark_entries_as_clean: entry %ld is not dirty!?!\n",
+ (long)addr);
+#endif /* H5C_DO_SANITY_CHECKS */
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Listed entry not dirty?!?!?.")
+
+ } else if ( entry_ptr->is_protected ) {
+
+ entry_ptr->clear_on_unprotect = TRUE;
+
+ } else {
+
+ if ( H5C_flush_single_entry(f,
+ primary_dxpl_id,
+ secondary_dxpl_id,
+ cache_ptr,
+ entry_ptr->type,
+ addr,
+ H5C__FLUSH_CLEAR_ONLY_FLAG,
+ &first_flush,
+ TRUE) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
+ }
+ }
+ }
+
+done:
+
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( H5C_validate_lru_list(cache_ptr) < 0 ) {
+
+ HDassert(0);
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "LRU sanity check failed.\n");
+ }
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5C_mark_entries_as_clean() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ *
* Function: H5C_rename_entry
*
* Purpose: Use this function to notify the cache that an entry's
@@ -3077,6 +3625,15 @@ H5C_rename_entry(H5C_t * cache_ptr,
HDassert( H5F_addr_defined(new_addr) );
HDassert( H5F_addr_ne(old_addr, new_addr) );
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( H5C_validate_lru_list(cache_ptr) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "LRU sanity check failed.\n");
+ }
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
+
H5C__SEARCH_INDEX(cache_ptr, old_addr, entry_ptr, FAIL)
if ( ( entry_ptr == NULL ) || ( entry_ptr->type != type ) )
@@ -3146,6 +3703,14 @@ H5C_rename_entry(H5C_t * cache_ptr,
done:
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( H5C_validate_lru_list(cache_ptr) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "LRU sanity check failed.\n");
+ }
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
FUNC_LEAVE_NOAPI(ret_value)
} /* H5C_rename_entry() */
@@ -3204,6 +3769,9 @@ done:
* forces an immediate reduction in cache size. Modified
* the code to deal with this eventuallity.
*
+ * JRM -- 6/24/05
+ * Added support for the new write_permitted field of H5C_t.
+ *
*-------------------------------------------------------------------------
*/
@@ -3237,6 +3805,15 @@ H5C_protect(H5F_t * f,
HDassert( type->load );
HDassert( H5F_addr_defined(addr) );
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( H5C_validate_lru_list(cache_ptr) < 0 ) {
+
+ HDassert(0);
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, NULL, \
+ "LRU sanity check failed.\n");
+ }
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
/* first check to see if the target is in cache */
H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, NULL)
@@ -3284,6 +3861,8 @@ H5C_protect(H5F_t * f,
}
} else {
+ write_permitted = cache_ptr->write_permitted;
+
have_write_permitted = TRUE;
}
@@ -3390,6 +3969,8 @@ H5C_protect(H5F_t * f,
}
} else {
+ write_permitted = cache_ptr->write_permitted;
+
have_write_permitted = TRUE;
}
}
@@ -3439,6 +4020,15 @@ H5C_protect(H5F_t * f,
done:
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( H5C_validate_lru_list(cache_ptr) < 0 ) {
+
+ HDassert(0);
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, NULL, \
+ "LRU sanity check failed.\n");
+ }
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
FUNC_LEAVE_NOAPI(ret_value)
} /* H5C_protect() */
@@ -3796,6 +4386,10 @@ done:
* JRM -- 7/21/04
* Updated function for the addition of the hash table.
*
+ * JRM -- 9/8/05
+ * Updated function for the addition of cache entry size
+ * change statistics.
+ *
*-------------------------------------------------------------------------
*/
@@ -3819,6 +4413,8 @@ H5C_stats(H5C_t * cache_ptr,
int64_t total_flushes = 0;
int64_t total_evictions = 0;
int64_t total_renames = 0;
+ int64_t total_size_increases = 0;
+ int64_t total_size_decreases = 0;
int32_t aggregate_max_accesses = 0;
int32_t aggregate_min_accesses = 1000000;
int32_t aggregate_max_clears = 0;
@@ -3845,13 +4441,15 @@ H5C_stats(H5C_t * cache_ptr,
for ( i = 0; i <= cache_ptr->max_type_id; i++ ) {
- total_hits += cache_ptr->hits[i];
- total_misses += cache_ptr->misses[i];
- total_insertions += cache_ptr->insertions[i];
- total_clears += cache_ptr->clears[i];
- total_flushes += cache_ptr->flushes[i];
- total_evictions += cache_ptr->evictions[i];
- total_renames += cache_ptr->renames[i];
+ total_hits += cache_ptr->hits[i];
+ total_misses += cache_ptr->misses[i];
+ total_insertions += cache_ptr->insertions[i];
+ total_clears += cache_ptr->clears[i];
+ total_flushes += cache_ptr->flushes[i];
+ total_evictions += cache_ptr->evictions[i];
+ total_renames += cache_ptr->renames[i];
+ total_size_increases += cache_ptr->size_increases[i];
+ total_size_decreases += cache_ptr->size_decreases[i];
#if H5C_COLLECT_CACHE_ENTRY_STATS
if ( aggregate_max_accesses < cache_ptr->max_accesses[i] )
aggregate_max_accesses = cache_ptr->max_accesses[i];
@@ -3963,6 +4561,10 @@ H5C_stats(H5C_t * cache_ptr,
(long)total_insertions,
(long)total_renames);
+ HDfprintf(stdout, " Total entry size incrs / decrs = %ld / %ld\n",
+ (long)total_size_increases,
+ (long)total_size_decreases);
+
#if H5C_COLLECT_CACHE_ENTRY_STATS
HDfprintf(stdout, " aggregate max / min accesses = %d / %d\n",
@@ -4014,6 +4616,11 @@ H5C_stats(H5C_t * cache_ptr,
(long)(cache_ptr->insertions[i]),
(long)(cache_ptr->renames[i]));
+ HDfprintf(stdout,
+ " size increases / decreases = %ld / %ld\n",
+ (long)(cache_ptr->size_increases[i]),
+ (long)(cache_ptr->size_decreases[i]));
+
#if H5C_COLLECT_CACHE_ENTRY_STATS
HDfprintf(stdout,
@@ -4061,6 +4668,9 @@ done:
* JRM - 7/21/04
* Updated for hash table related statistics.
*
+ * JRM - 9/8/05
+ * Updated for size increase / decrease statistics.
+ *
*-------------------------------------------------------------------------
*/
@@ -4084,6 +4694,8 @@ H5C_stats__reset(H5C_t * cache_ptr)
cache_ptr->flushes[i] = 0;
cache_ptr->evictions[i] = 0;
cache_ptr->renames[i] = 0;
+ cache_ptr->size_increases[i] = 0;
+ cache_ptr->size_decreases[i] = 0;
}
cache_ptr->total_ht_insertions = 0;
@@ -4176,6 +4788,27 @@ H5C_stats__reset(H5C_t * cache_ptr)
* field into the cache code. This has become necessary
* to repair a cache coherency bug in PHDF5.
*
+ * JRM -- 7/5/05
+ * Added code supporting the new clear_on_unprotect field
+ * of H5C_cache_entry_t. This change is also part of the
+ * above mentioned cache coherency bug fix in PHDF5.
+ *
+ * JRM -- 9/8/05
+ * Added the size_changed and new_size parameters and the
+ * supporting code. Since the metadata cache synchronizes
+ * on dirty bytes creation in the PHDF5 case, we must now
+ * track changes in entry size.
+ *
+ * Note that the new_size parameter is ignored unless the
+ * size_changed parameter is TRUE. In this case, the new_size
+ * must be positive.
+ *
+ * Also observe that if size_changed is TRUE, dirtied must be
+ * TRUE.
+ *
+ * JRM -- 9/23/05
+ * Moved the size_changed parameter into flags.
+ *
*-------------------------------------------------------------------------
*/
herr_t
@@ -4186,16 +4819,27 @@ H5C_unprotect(H5F_t * f,
const H5C_class_t * type,
haddr_t addr,
void * thing,
- unsigned int flags)
+ unsigned int flags,
+ size_t new_size)
{
hbool_t deleted;
+ hbool_t dirtied;
hbool_t set_flush_marker;
+ hbool_t size_changed;
+#ifdef H5_HAVE_PARALLEL
+ hbool_t clear_entry = FALSE;
+#endif /* H5_HAVE_PARALLEL */
herr_t ret_value = SUCCEED; /* Return value */
H5C_cache_entry_t * entry_ptr;
H5C_cache_entry_t * test_entry_ptr;
FUNC_ENTER_NOAPI(H5C_unprotect, FAIL)
+ deleted = ( (flags & H5C__DELETED_FLAG) != 0 );
+ dirtied = ( (flags & H5C__DIRTIED_FLAG) != 0 );
+ set_flush_marker = ( (flags & H5C__SET_FLUSH_MARKER_FLAG) != 0 );
+ size_changed = ( (flags & H5C__SIZE_CHANGED_FLAG) != 0 );
+
HDassert( cache_ptr );
HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
HDassert( cache_ptr->skip_file_checks || f );
@@ -4204,15 +4848,52 @@ H5C_unprotect(H5F_t * f,
HDassert( type->flush );
HDassert( H5F_addr_defined(addr) );
HDassert( thing );
-
- deleted = ( (flags & H5C__DELETED_FLAG) != 0 );
- set_flush_marker = ( (flags & H5C__SET_FLUSH_MARKER_FLAG) != 0 );
+ HDassert( ( size_changed == TRUE ) || ( size_changed == FALSE ) );
+ HDassert( ( ! size_changed ) || ( dirtied ) );
+ HDassert( ( ! size_changed ) || ( new_size > 0 ) );
entry_ptr = (H5C_cache_entry_t *)thing;
HDassert( entry_ptr->addr == addr );
HDassert( entry_ptr->type == type );
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( H5C_validate_lru_list(cache_ptr) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "LRU sanity check failed.\n");
+ }
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
+#ifdef H5_HAVE_PARALLEL
+ /* When the H5C code is used to implement the metadata cache in the
+ * PHDF5 case, only the cache on process 0 is allowed to write to file.
+ * All the other metadata caches must hold dirty entries until they
+ * are told that the entries are clean.
+ *
+ * The clear_on_unprotect flag in the H5C_cache_entry_t structure
+ * exists to deal with the case in which an entry is protected when
+ * its cache receives word that the entry is now clean. In this case,
+ * the clear_on_unprotect flag is set, and the entry is flushed with
+ * the H5C__FLUSH_CLEAR_ONLY_FLAG.
+ *
+ * All this is a bit awkward, but until the metadata cache entries
+ * are contiguous, with only one dirty flag, we have to let the supplied
+ * functions deal with the reseting the is_dirty flag.
+ */
+ if ( entry_ptr->clear_on_unprotect ) {
+
+ HDassert( entry_ptr->is_dirty );
+
+ entry_ptr->clear_on_unprotect = FALSE;
+
+ if ( ! dirtied ) {
+
+ clear_entry = TRUE;
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
if ( ! (entry_ptr->is_protected) ) {
HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
@@ -4220,13 +4901,40 @@ H5C_unprotect(H5F_t * f,
}
/* mark the entry as dirty if appropriate */
- entry_ptr->is_dirty = ( (entry_ptr->is_dirty) || (flags & H5AC__DIRTIED_FLAG) );
+ entry_ptr->is_dirty = ( (entry_ptr->is_dirty) || dirtied );
+
+ /* update for change in entry size if necessary */
+ if ( ( size_changed ) && ( entry_ptr->size != new_size ) ) {
+
+ /* update the protected list */
+ H5C__DLL_UPDATE_FOR_SIZE_CHANGE((cache_ptr->pl_len), \
+ (cache_ptr->pl_size), \
+ (entry_ptr->size), (new_size));
+
+ /* update the hash table */
+ H5C__UPDATE_INDEX_FOR_SIZE_CHANGE((cache_ptr), (entry_ptr->size),\
+ (new_size));
+
+ /* if the entry is in the skip list, update that too */
+ if ( entry_ptr->in_slist ) {
+
+ H5C__UPDATE_SLIST_FOR_SIZE_CHANGE((cache_ptr), (entry_ptr->size),\
+ (new_size));
+ }
+
+ /* update statistics just before changing the entry size */
+ H5C__UPDATE_STATS_FOR_ENTRY_SIZE_CHANGE((cache_ptr), (entry_ptr), \
+ (new_size));
+
+ /* finally, update the entry size proper */
+ entry_ptr->size = new_size;
+ }
H5C__UPDATE_RP_FOR_UNPROTECT(cache_ptr, entry_ptr, FAIL)
entry_ptr->is_protected = FALSE;
- /* if the entry is dirty, or its flush_marker with the set flush flag,
+ /* if the entry is dirty, 'or' its flush_marker with the set flush flag,
* and then add it to the skip list if it isn't there already.
*/
@@ -4287,11 +4995,57 @@ H5C_unprotect(H5F_t * f,
HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "Can't flush.")
}
}
+#ifdef H5_HAVE_PARALLEL
+ else if ( clear_entry ) {
+
+ /* the following first flush flag will never be used as we are
+ * calling H5C_flush_single_entry with the H5C__FLUSH_CLEAR_ONLY_FLAG
+ * flag. However, it is needed for the function call.
+ */
+ hbool_t dummy_first_flush = TRUE;
+
+ /* verify that the target entry is in the cache. */
+
+ H5C__SEARCH_INDEX(cache_ptr, addr, test_entry_ptr, FAIL)
+
+ if ( test_entry_ptr == NULL ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
+ "entry not in hash table?!?.")
+ }
+ else if ( test_entry_ptr != entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
+ "hash table contains multiple entries for addr?!?.")
+ }
+
+ if ( H5C_flush_single_entry(f,
+ primary_dxpl_id,
+ secondary_dxpl_id,
+ cache_ptr,
+ type,
+ addr,
+ H5C__FLUSH_CLEAR_ONLY_FLAG,
+ &dummy_first_flush,
+ TRUE) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "Can't clear.")
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
H5C__UPDATE_STATS_FOR_UNPROTECT(cache_ptr)
done:
+#if H5C_DO_EXTREME_SANITY_CHECKS
+ if ( H5C_validate_lru_list(cache_ptr) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "LRU sanity check failed.\n");
+ }
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
FUNC_LEAVE_NOAPI(ret_value)
} /* H5C_unprotect() */
@@ -5656,6 +6410,18 @@ done:
* H5C__FLUSH_INVALIDATE_FLAG and H5C__FLUSH_CLEAR_ONLY_FLAG
* respectively.
*
+ * JRM -- 6/24/05
+ * Added code to remove dirty entries from the slist after
+ * they have been flushed. Also added a sanity check that
+ * will scream if we attempt a write when writes are
+ * completely disabled.
+ *
+ * JRM -- 7/5/05
+ * Added code to call the new log_flush callback whenever
+ * a dirty entry is written to disk. Note that the callback
+ * is not called if the H5C__FLUSH_CLEAR_ONLY_FLAG is set,
+ * as there is no write to file in this case.
+ *
*-------------------------------------------------------------------------
*/
static herr_t
@@ -5671,8 +6437,10 @@ H5C_flush_single_entry(H5F_t * f,
{
hbool_t destroy;
hbool_t clear_only;
+ hbool_t was_dirty;
herr_t ret_value = SUCCEED; /* Return value */
herr_t status;
+ int type_id;
H5C_cache_entry_t * entry_ptr = NULL;
FUNC_ENTER_NOAPI_NOINIT(H5C_flush_single_entry)
@@ -5779,6 +6547,9 @@ H5C_flush_single_entry(H5F_t * f,
#endif /* NDEBUG */
#endif /* H5_HAVE_PARALLEL */
+ was_dirty = entry_ptr->is_dirty;
+ type_id = entry_ptr->type->id;
+
entry_ptr->flush_marker = FALSE;
if ( clear_only ) {
@@ -5928,6 +6699,16 @@ H5C_flush_single_entry(H5F_t * f,
}
} else {
+#if H5C_DO_SANITY_CHECKS
+ if ( ( entry_ptr->is_dirty ) &&
+ ( cache_ptr->check_write_permitted == NULL ) &&
+ ( ! (cache_ptr->write_permitted) ) ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Write when writes are always forbidden!?!?!")
+ }
+#endif /* H5C_DO_SANITY_CHECKS */
+
/* Only block for all the processes on the first piece of metadata
*/
@@ -5951,10 +6732,28 @@ H5C_flush_single_entry(H5F_t * f,
}
}
+ if ( ( ! destroy ) && ( entry_ptr->in_slist ) ) {
+
+ H5C__REMOVE_ENTRY_FROM_SLIST(cache_ptr, entry_ptr)
+ }
+
if ( ! destroy ) {
HDassert( !(entry_ptr->is_dirty) );
HDassert( !(entry_ptr->flush_marker) );
+ HDassert( !(entry_ptr->in_slist) );
+ }
+
+ if ( cache_ptr->log_flush ) {
+
+ status = (cache_ptr->log_flush)(cache_ptr, addr, was_dirty,
+ flags, type_id);
+
+ if ( status < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \
+ "log_flush callback failed.")
+ }
}
}
@@ -6024,6 +6823,10 @@ H5C_load_entry(H5F_t * f,
entry_ptr->type = type;
entry_ptr->is_protected = FALSE;
entry_ptr->in_slist = FALSE;
+ entry_ptr->flush_marker = FALSE;
+#ifdef H5_HAVE_PARALLEL
+ entry_ptr->clear_on_unprotect = FALSE;
+#endif /* H5_HAVE_PARALLEL */
if ( (type->size)(f, thing, &(entry_ptr->size)) < 0 ) {
@@ -6271,3 +7074,203 @@ done:
FUNC_LEAVE_NOAPI(ret_value)
} /* H5C_make_space_in_cache() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5C_validate_lru_list
+ *
+ * Purpose: Debugging function that scans the LRU list for errors.
+ *
+ * If an error is detected, the function generates a
+ * diagnostic and returns FAIL. If no error is detected,
+ * the function returns SUCCEED.
+ *
+ * Return: FAIL if error is detected, SUCCEED otherwise.
+ *
+ * Programmer: John Mainzer, 7/14/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#if H5C_DO_EXTREME_SANITY_CHECKS
+
+static herr_t
+H5C_validate_lru_list(H5C_t * cache_ptr)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ int32_t len = 0;
+ size_t size = 0;
+ H5C_cache_entry_t * entry_ptr = NULL;
+
+ FUNC_ENTER_NOAPI_NOINIT(H5C_validate_lru_list)
+
+ HDassert( cache_ptr );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ if ( ( ( cache_ptr->LRU_head_ptr == NULL )
+ ||
+ ( cache_ptr->LRU_tail_ptr == NULL )
+ )
+ &&
+ ( cache_ptr->LRU_head_ptr != cache_ptr->LRU_tail_ptr )
+ ) {
+
+ HDfprintf(stdout,"H5C_validate_lru_list: Check 1 failed.\n");
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 1 failed")
+ }
+
+ if ( ( cache_ptr->LRU_list_len < 0 ) || ( cache_ptr->LRU_list_size < 0 ) ) {
+
+ HDfprintf(stdout,"H5C_validate_lru_list: Check 2 failed.\n");
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 2 failed")
+ }
+
+ if ( ( cache_ptr->LRU_list_len == 1 )
+ &&
+ ( ( cache_ptr->LRU_head_ptr != cache_ptr->LRU_tail_ptr )
+ ||
+ ( cache_ptr->LRU_head_ptr == NULL )
+ ||
+ ( cache_ptr->LRU_head_ptr->size != cache_ptr->LRU_list_size )
+ )
+ ) {
+
+ HDfprintf(stdout,"H5C_validate_lru_list: Check 3 failed.\n");
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 3 failed")
+ }
+
+ if ( ( cache_ptr->LRU_list_len >= 1 )
+ &&
+ ( ( cache_ptr->LRU_head_ptr == NULL )
+ ||
+ ( cache_ptr->LRU_head_ptr->prev != NULL )
+ ||
+ ( cache_ptr->LRU_tail_ptr == NULL )
+ ||
+ ( cache_ptr->LRU_tail_ptr->next != NULL )
+ )
+ ) {
+
+ HDfprintf(stdout,"H5C_validate_lru_list: Check 4 failed.\n");
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 4 failed")
+ }
+
+ entry_ptr = cache_ptr->LRU_head_ptr;
+ while ( entry_ptr != NULL )
+ {
+
+ if ( ( entry_ptr != cache_ptr->LRU_head_ptr ) &&
+ ( ( entry_ptr->prev == NULL ) ||
+ ( entry_ptr->prev->next != entry_ptr ) ) ) {
+
+ HDfprintf(stdout,"H5C_validate_lru_list: Check 5 failed.\n");
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 5 failed")
+ }
+
+ if ( ( entry_ptr != cache_ptr->LRU_tail_ptr ) &&
+ ( ( entry_ptr->next == NULL ) ||
+ ( entry_ptr->next->prev != entry_ptr ) ) ) {
+
+ HDfprintf(stdout,"H5C_validate_lru_list: Check 6 failed.\n");
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 6 failed")
+ }
+
+ len++;
+ size += entry_ptr->size;
+ entry_ptr = entry_ptr->next;
+ }
+
+ if ( ( cache_ptr->LRU_list_len != len ) ||
+ ( cache_ptr->LRU_list_size != size ) ) {
+
+ HDfprintf(stdout,"H5C_validate_lru_list: Check 7 failed.\n");
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 7 failed")
+ }
+
+done:
+
+ if ( ret_value != SUCCEED ) {
+
+ HDassert(0);
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5C_validate_lru_list() */
+
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5C_verify_not_in_index
+ *
+ * Purpose: Debugging function that scans the hash table to verify
+ * that the specified instance of H5C_cache_entry_t is not
+ * present.
+ *
+ * If an error is detected, the function generates a
+ * diagnostic and returns FAIL. If no error is detected,
+ * the function returns SUCCEED.
+ *
+ * Return: FAIL if error is detected, SUCCEED otherwise.
+ *
+ * Programmer: John Mainzer, 7/14/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#if H5C_DO_EXTREME_SANITY_CHECKS
+
+static herr_t
+H5C_verify_not_in_index(H5C_t * cache_ptr,
+ H5C_cache_entry_t * entry_ptr)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ int32_t i;
+ int32_t depth;
+ H5C_cache_entry_t * scan_ptr = NULL;
+
+ FUNC_ENTER_NOAPI_NOINIT(H5C_verify_not_in_index)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+ HDassert( entry_ptr != NULL );
+
+ for ( i = 0; i < H5C__HASH_TABLE_LEN; i++ )
+ {
+ depth = 0;
+ scan_ptr = cache_ptr->index[i];
+
+ while ( scan_ptr != NULL )
+ {
+ if ( scan_ptr == entry_ptr ) {
+
+ HDfprintf(stdout,
+ "H5C_verify_not_in_index: entry in index (%d/%d)\n",
+ i, depth);
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Entry already in index.")
+ }
+ depth++;
+ scan_ptr = scan_ptr->ht_next;
+ }
+ }
+
+done:
+
+ if ( ret_value != SUCCEED ) {
+
+ HDassert(0);
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5C_verify_not_in_index() */
+
+#endif /* H5C_DO_EXTREME_SANITY_CHECKS */
diff --git a/src/H5Cpkg.h b/src/H5Cpkg.h
index 54a8f48..007f30f 100644
--- a/src/H5Cpkg.h
+++ b/src/H5Cpkg.h
@@ -79,9 +79,18 @@
*
* JRM - 7/19/04
*
+ * The TBBT has since been replaced with a skip list. This change
+ * greatly predates this note.
+ *
+ * JRM - 9/26/05
+ *
* magic: Unsigned 32 bit integer always set to H5C__H5C_T_MAGIC. This
* field is used to validate pointers to instances of H5C_t.
*
+ * aux_ptr: Pointer to void used to allow wrapper code to associate
+ * its data with an instance of H5C_t. The H5C cache code
+ * sets this field to NULL, and otherwise leaves it alone.
+ *
* max_type_id: Integer field containing the maximum type id number assigned
* to a type of entry in the cache. All type ids from 0 to
* max_type_id inclusive must be defined. The names of the
@@ -110,6 +119,10 @@
* will attempt to reduce its size to the max_cache_size
* limit on the next cache write.
*
+ * c) When an entry increases in size, the cache may exceed
+ * the max_cache_size limit until the next time the cache
+ * attempts to load or insert an entry.
+ *
* min_clean_size: Nominal minimum number of clean bytes in the cache.
* The cache attempts to maintain this number of bytes of
* clean data so as to avoid case b) above. Again, this is
@@ -126,7 +139,14 @@
* a write is permissible at any given point in time.
*
* If no such function is specified (i.e. this field is NULL),
- * the cache will presume that writes are always permissable.
+ * the cache uses the following write_permitted field to
+ * determine whether writes are permitted.
+ *
+ * write_permitted: If check_write_permitted is NULL, this boolean flag
+ * indicates whether writes are permitted.
+ *
+ * log_flush: If provided, this function is called whenever a dirty
+ * entry is flushed to disk.
*
*
* The cache requires an index to facilitate searching for entries. The
@@ -483,6 +503,16 @@
* id equal to the array index has been renamed in the current
* epoch.
*
+ * size_increases: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.
+ * The cells are used to record the number of times an entry
+ * with type id equal to the array index has increased in
+ * size in the current epoch.
+ *
+ * size_decreases: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1.
+ * The cells are used to record the number of times an entry
+ * with type id equal to the array index has decreased in
+ * size in the current epoch.
+ *
* total_ht_insertions: Number of times entries have been inserted into the
* hash table in the current epoch.
*
@@ -580,6 +610,8 @@ struct H5C_t
{
uint32_t magic;
+ void * aux_ptr;
+
int32_t max_type_id;
const char * (* type_name_table_ptr);
@@ -587,6 +619,9 @@ struct H5C_t
size_t min_clean_size;
H5C_write_permitted_func_t check_write_permitted;
+ hbool_t write_permitted;
+
+ H5C_log_flush_func_t log_flush;
int32_t index_len;
size_t index_size;
@@ -646,6 +681,8 @@ struct H5C_t
int64_t flushes[H5C__MAX_NUM_TYPE_IDS + 1];
int64_t evictions[H5C__MAX_NUM_TYPE_IDS + 1];
int64_t renames[H5C__MAX_NUM_TYPE_IDS + 1];
+ int64_t size_increases[H5C__MAX_NUM_TYPE_IDS + 1];
+ int64_t size_decreases[H5C__MAX_NUM_TYPE_IDS + 1];
int64_t total_ht_insertions;
int64_t total_ht_deletions;
diff --git a/src/H5Cprivate.h b/src/H5Cprivate.h
index 7c0151b..57d74af 100644
--- a/src/H5Cprivate.h
+++ b/src/H5Cprivate.h
@@ -36,6 +36,7 @@
#include "H5Fprivate.h" /* File access */
#define H5C_DO_SANITY_CHECKS 0
+#define H5C_DO_EXTREME_SANITY_CHECKS 0
/* This sanity checking constant was picked out of the air. Increase
* or decrease it if appropriate. Its purposes is to detect corrupt
@@ -149,6 +150,12 @@ typedef herr_t (*H5C_write_permitted_func_t)(const H5F_t *f,
hid_t dxpl_id,
hbool_t * write_permitted_ptr);
+typedef herr_t (*H5C_log_flush_func_t)(H5C_t * cache_ptr,
+ haddr_t addr,
+ hbool_t was_dirty,
+ unsigned flags,
+ int type_id);
+
/* Upper and lower limits on cache size. These limits are picked
* out of a hat -- you should be able to change them as necessary.
*
@@ -180,8 +187,9 @@ typedef herr_t (*H5C_write_permitted_func_t)(const H5F_t *f,
*
* In typical application, this structure is the first field in a
* structure to be cached. For historical reasons, the external module
- * is responsible for managing the is_dirty field. All other fields are
- * managed by the cache.
+ * is responsible for managing the is_dirty field (this is no longer
+ * completely true. See the comment on the is_dirty field for details).
+ * All other fields are managed by the cache.
*
* The fields of this structure are discussed individually below:
*
@@ -220,6 +228,12 @@ typedef herr_t (*H5C_write_permitted_func_t)(const H5F_t *f,
* someday. However it will require a change in the
* cache interface.
*
+ * Update: Management of the is_dirty field has been largely
+ * moved into the cache. The only remaining exceptions
+ * are the flush and clear functions supplied by the
+ * modules using the cache. These still clear the
+ * is_dirty field as before. -- JRM 7/5/05
+ *
* is_protected: Boolean flag indicating whether this entry is protected
* (or locked, to use more conventional terms). When it is
* protected, the entry cannot be flushed or accessed until
@@ -239,6 +253,18 @@ typedef herr_t (*H5C_write_permitted_func_t)(const H5F_t *f,
* H5AC__FLUSH_MARKED_ENTRIES_FLAG. The flag is reset when
* the entry is flushed for whatever reason.
*
+ * clear_on_unprotect: Boolean flag used only in PHDF5. When H5C is used
+ * to implement the metadata cache In the parallel case, only
+ * the cache with mpi rank 0 is allowed to actually write to
+ * file -- all other caches must retain dirty entries until they
+ * are advised that the entry is clean.
+ *
+ * This flag is used in the case that such an advisory is
+ * received when the entry is protected. If it is set when an
+ * entry is unprotected, and the dirtied flag is not set in
+ * the unprotect, the entry's is_dirty flag is reset by flushing
+ * it with the H5C__FLUSH_CLEAR_ONLY_FLAG.
+ *
*
* Fields supporting the hash table:
*
@@ -344,6 +370,9 @@ typedef struct H5C_cache_entry_t
hbool_t is_protected;
hbool_t in_slist;
hbool_t flush_marker;
+#ifdef H5_HAVE_PARALLEL
+ hbool_t clear_on_unprotect;
+#endif /* H5_HAVE_PARALLEL */
/* fields supporting the hash table: */
@@ -676,20 +705,24 @@ typedef struct H5C_auto_size_ctl_t
#define H5C__SET_FLUSH_MARKER_FLAG 0x0001
#define H5C__DELETED_FLAG 0x0002
-/* This flag applies only to H5C_unprotect() */
+/* These flags applies only to H5C_unprotect() */
#define H5C__DIRTIED_FLAG 0x0004
+#define H5C__SIZE_CHANGED_FLAG 0x0008
/* These flags apply to H5C_flush() & H5C_flush_single_entry() */
-#define H5C__FLUSH_INVALIDATE_FLAG 0x0008
-#define H5C__FLUSH_CLEAR_ONLY_FLAG 0x0010
-#define H5C__FLUSH_MARKED_ENTRIES_FLAG 0x0020
+#define H5C__FLUSH_INVALIDATE_FLAG 0x0010
+#define H5C__FLUSH_CLEAR_ONLY_FLAG 0x0020
+#define H5C__FLUSH_MARKED_ENTRIES_FLAG 0x0040
H5_DLL H5C_t * H5C_create(size_t max_cache_size,
size_t min_clean_size,
int max_type_id,
- const char * (* type_name_table_ptr),
- H5C_write_permitted_func_t check_write_permitted);
+ const char * (* type_name_table_ptr),
+ H5C_write_permitted_func_t check_write_permitted,
+ hbool_t write_permitted,
+ H5C_log_flush_func_t log_flush,
+ void * aux_ptr);
H5_DLL void H5C_def_auto_resize_rpt_fcn(H5C_t * cache_ptr,
int32_t version,
@@ -713,6 +746,11 @@ H5_DLL herr_t H5C_flush_cache(H5F_t * f,
H5C_t * cache_ptr,
unsigned flags);
+H5_DLL herr_t H5C_flush_to_min_clean(H5F_t * f,
+ hid_t primary_dxpl_id,
+ hid_t secondary_dxpl_id,
+ H5C_t * cache_ptr);
+
H5_DLL herr_t H5C_get_cache_auto_resize_config(H5C_t * cache_ptr,
H5C_auto_size_ctl_t *config_ptr);
@@ -725,6 +763,13 @@ H5_DLL herr_t H5C_get_cache_size(H5C_t * cache_ptr,
H5_DLL herr_t H5C_get_cache_hit_rate(H5C_t * cache_ptr,
double * hit_rate_ptr);
+H5_DLL herr_t H5C_get_entry_status(H5C_t * cache_ptr,
+ haddr_t addr,
+ size_t * size_ptr,
+ hbool_t * in_cache_ptr,
+ hbool_t * is_dirty_ptr,
+ hbool_t * is_protected_ptr);
+
H5_DLL herr_t H5C_insert_entry(H5F_t * f,
hid_t primary_dxpl_id,
hid_t secondary_dxpl_id,
@@ -734,6 +779,13 @@ H5_DLL herr_t H5C_insert_entry(H5F_t * f,
void * thing,
unsigned int flags);
+H5_DLL herr_t H5C_mark_entries_as_clean(H5F_t * f,
+ hid_t primary_dxpl_id,
+ hid_t secondary_dxpl_id,
+ H5C_t * cache_ptr,
+ int32_t ce_array_len,
+ haddr_t * ce_array_ptr);
+
H5_DLL herr_t H5C_rename_entry(H5C_t * cache_ptr,
const H5C_class_t * type,
haddr_t old_addr,
@@ -770,7 +822,8 @@ H5_DLL herr_t H5C_unprotect(H5F_t * f,
const H5C_class_t * type,
haddr_t addr,
void * thing,
- unsigned flags);
+ unsigned int flags,
+ size_t new_size);
H5_DLL herr_t H5C_validate_resize_config(H5C_auto_size_ctl_t * config_ptr,
unsigned int tests);
diff --git a/src/H5D.c b/src/H5D.c
index 1063499..400d41e 100644
--- a/src/H5D.c
+++ b/src/H5D.c
@@ -2034,7 +2034,7 @@ H5D_update_entry_info(H5F_t *file, hid_t dxpl_id, H5D_t *dset, H5P_genplist_t *p
#endif /* H5O_ENABLE_BOGUS */
/* Add a modification time message. */
- if (H5O_touch_oh(file, oh, TRUE, &oh_flags) < 0)
+ if (H5O_touch_oh(file, dxpl_id, oh, TRUE, &oh_flags) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to update modification time message")
done:
diff --git a/src/H5F.c b/src/H5F.c
index f9c1784..cd75a2f 100644
--- a/src/H5F.c
+++ b/src/H5F.c
@@ -67,7 +67,8 @@ static int H5F_flush_all_cb(void *f, hid_t fid, void *_invalidate);
static unsigned H5F_get_objects(const H5F_t *f, unsigned types, int max_objs, hid_t *obj_id_list);
static int H5F_get_objects_cb(void *obj_ptr, hid_t obj_id, void *key);
static herr_t H5F_get_vfd_handle(const H5F_t *file, hid_t fapl, void** file_handle);
-static H5F_t *H5F_new(H5F_file_t *shared, hid_t fcpl_id, hid_t fapl_id);
+static H5F_t *H5F_new(H5F_file_t *shared, hid_t fcpl_id, hid_t fapl_id,
+ H5FD_t *lf);
static herr_t H5F_dest(H5F_t *f, hid_t dxpl_id);
static herr_t H5F_flush(H5F_t *f, hid_t dxpl_id, H5F_scope_t scope, unsigned flags);
static herr_t H5F_close(H5F_t *f);
@@ -1426,10 +1427,16 @@ done:
* Updated for the new metadata cache, and associated
* property list changes.
*
+ * J Mainzer, Jun 30, 2005
+ * Added lf parameter so the shared->lf field can be
+ * initialized prior to the call to H5AC_create() if a
+ * new instance of H5F_file_t is created. lf should be
+ * NULL if shared isn't, and vise versa.
+ *
*-------------------------------------------------------------------------
*/
static H5F_t *
-H5F_new(H5F_file_t *shared, hid_t fcpl_id, hid_t fapl_id)
+H5F_new(H5F_file_t *shared, hid_t fcpl_id, hid_t fapl_id, H5FD_t *lf)
{
H5F_t *f=NULL, *ret_value;
H5P_genplist_t *plist; /* Property list */
@@ -1441,14 +1448,17 @@ H5F_new(H5F_file_t *shared, hid_t fcpl_id, hid_t fapl_id)
f->file_id = -1;
if (shared) {
+ HDassert( lf == NULL );
f->shared = shared;
} else {
+ HDassert( lf != NULL );
f->shared = H5FL_CALLOC(H5F_file_t);
f->shared->super_addr = HADDR_UNDEF;
f->shared->base_addr = HADDR_UNDEF;
f->shared->freespace_addr = HADDR_UNDEF;
f->shared->driver_addr = HADDR_UNDEF;
-
+ f->shared->lf = lf;
+
/*
* Copy the file creation and file access property lists into the
* new file handle. We do this early because some values might need
@@ -1803,10 +1813,10 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id, hid_t d
HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "file is already open for read-only")
/* Allocate new "high-level" file struct */
- if ((file = H5F_new(shared, fcpl_id, fapl_id)) == NULL)
+ if ((file = H5F_new(shared, fcpl_id, fapl_id, NULL)) == NULL)
HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to create new file object")
- lf = file->shared->lf;
+ lf = file->shared->lf;
} else if (flags!=tent_flags) {
/*
* This file is not yet open by the library and the flags we used to
@@ -1821,20 +1831,18 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id, hid_t d
file = NULL; /*to prevent destruction of wrong file*/
HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open file")
}
- if (NULL==(file = H5F_new(NULL, fcpl_id, fapl_id)))
+ if (NULL==(file = H5F_new(NULL, fcpl_id, fapl_id, lf)))
HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to create new file object")
file->shared->flags = flags;
- file->shared->lf = lf;
} else {
/*
* This file is not yet open by the library and our tentative opening
* above is good enough.
*/
- if (NULL==(file = H5F_new(NULL, fcpl_id, fapl_id)))
+ if (NULL==(file = H5F_new(NULL, fcpl_id, fapl_id, lf)))
HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to create new file object")
file->shared->flags = flags;
- file->shared->lf = lf;
}
/* Short cuts */
@@ -2841,7 +2849,7 @@ H5Freopen(hid_t file_id)
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file")
/* Get a new "top level" file struct, sharing the same "low level" file struct */
- if (NULL==(new_file=H5F_new(old_file->shared, H5P_FILE_CREATE_DEFAULT, H5P_FILE_ACCESS_DEFAULT)))
+ if (NULL==(new_file=H5F_new(old_file->shared, H5P_FILE_CREATE_DEFAULT, H5P_FILE_ACCESS_DEFAULT, NULL)))
HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, FAIL, "unable to reopen file")
/* Keep old file's read/write intent in new file */
@@ -3389,6 +3397,40 @@ H5F_mpi_get_comm(const H5F_t *f)
done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5F_mpi_get_comm() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5F_mpi_get_size
+ *
+ * Purpose: Retrieves the size of an MPI process.
+ *
+ * Return: Success: The size (positive)
+ *
+ * Failure: Negative
+ *
+ * Programmer: John Mainzer
+ * Friday, May 6, 2005
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+int
+H5F_mpi_get_size(const H5F_t *f)
+{
+ int ret_value;
+
+ FUNC_ENTER_NOAPI(H5F_mpi_get_size, FAIL)
+
+ assert(f && f->shared);
+
+ /* Dispatch to driver */
+ if ((ret_value=H5FD_mpi_get_size(f->shared->lf))<0)
+ HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "driver get_size request failed")
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5F_mpi_get_size() */
#endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5FD.c b/src/H5FD.c
index 8d152ec..b40b7b0 100644
--- a/src/H5FD.c
+++ b/src/H5FD.c
@@ -2328,7 +2328,7 @@ H5FD_free(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, hsize_t si
H5_ASSIGN_OVERFLOW(overlap_size,(file->accum_loc+file->accum_size)-addr,haddr_t,size_t);
/* Block to free is in the middle of the accumulator */
- if(H5F_addr_lt(addr,file->accum_loc+file->accum_size)) {
+ if(H5F_addr_lt((addr + size), file->accum_loc + file->accum_size)) {
haddr_t tail_addr;
size_t tail_size;
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index f285f1a..3cf1968 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -934,6 +934,13 @@ done:
*
* Modifications:
*
+ * John Mainzer -- 9/21/05
+ * Modified code to turn off the
+ * H5FD_FEAT_ACCUMULATE_METADATA_WRITE flag.
+ * With the movement of
+ * all cache writes to process 0, this flag has become
+ * problematic in PHDF5.
+ *
*-------------------------------------------------------------------------
*/
static herr_t
@@ -947,15 +954,6 @@ H5FD_mpio_query(const H5FD_t UNUSED *_file, unsigned long *flags /* out */)
if(flags) {
*flags=0;
*flags|=H5FD_FEAT_AGGREGATE_METADATA; /* OK to aggregate metadata allocations */
-
- /* Distinguish between updating the metadata accumulator on writes and
- * reads. This is particularly (perhaps only, even) important for MPI-I/O
- * where we guarantee that writes are collective, but reads may not be.
- * If we were to allow the metadata accumulator to be written during a
- * read operation, the application would hang.
- */
- *flags|=H5FD_FEAT_ACCUMULATE_METADATA_WRITE; /* OK to accumulate metadata for faster writes */
-
*flags|=H5FD_FEAT_AGGREGATE_SMALLDATA; /* OK to aggregate "small" raw data allocations */
} /* end if */
@@ -1553,9 +1551,18 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
if(H5P_get(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,&block_before_meta_write)<0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get H5AC property")
+#if 0 /* JRM */
+ /* The metadata cache now only writes from process 0, which makes
+ * this synchronization incorrect. I'm leaving this code commented
+ * out instead of deleting it to remind us that we should re-write
+ * this function so that a metadata write from any other process
+ * should flag an error.
+ * -- JRM 9/1/05
+ */
if(block_before_meta_write)
if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code)
+#endif /* JRM */
/* Only one process will do the actual write if all procs in comm write same metadata */
if (file->mpi_rank != H5_PAR_META_WRITE) {
@@ -1616,11 +1623,22 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
file->eof = HADDR_UNDEF;
done:
- /* if only one process writes, need to broadcast the ret_value to other processes */
+
+#if 0 /* JRM */
+ /* Since metadata writes are now done by process 0 only, this broadcast
+ * is no longer needed. I leave it in and commented out to remind us
+ * that we need to re-work this function to reflect this reallity.
+ *
+ * -- JRM 9/1/05
+ */
+ /* if only one process writes, need to broadcast the ret_value to
+ * other processes
+ */
if (type!=H5FD_MEM_DRAW) {
if (MPI_SUCCESS != (mpi_code=MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm)))
HMPI_DONE_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
} /* end if */
+#endif /* JRM */
#ifdef H5FDmpio_DEBUG
if (H5FD_mpio_Debug[(int)'t'])
diff --git a/src/H5FDmpiposix.c b/src/H5FDmpiposix.c
index 11e7849..de491f0 100644
--- a/src/H5FDmpiposix.c
+++ b/src/H5FDmpiposix.c
@@ -910,6 +910,12 @@ done:
*
* Modifications:
*
+ * John Mainzer -- 9/21/05
+ * Modified code to turn off the
+ * H5FD_FEAT_ACCUMULATE_METADATA_WRITE flag.
+ * With the movement of all cache writes to process 0,
+ * this flag has become problematic in PHDF5.
+ *
*-------------------------------------------------------------------------
*/
static herr_t
@@ -923,15 +929,6 @@ H5FD_mpiposix_query(const H5FD_t UNUSED *_file, unsigned long *flags /* out */)
if(flags) {
*flags=0;
*flags|=H5FD_FEAT_AGGREGATE_METADATA; /* OK to aggregate metadata allocations */
-
- /* Distinguish between updating the metadata accumulator on writes and
- * reads. This is particularly (perhaps only, even) important for MPI-I/O
- * where we guarantee that writes are collective, but reads may not be.
- * If we were to allow the metadata accumulator to be written during a
- * read operation, the application would hang.
- */
- *flags|=H5FD_FEAT_ACCUMULATE_METADATA_WRITE; /* OK to accumulate metadata for faster writes */
-
*flags|=H5FD_FEAT_AGGREGATE_SMALLDATA; /* OK to aggregate "small" raw data allocations */
} /* end if */
@@ -1235,6 +1232,14 @@ H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list")
/* Metadata specific actions */
+ /* All metadata is now written from process 0 -- thus this function
+ * needs to be re-written to reflect this. For now I have simply
+ * commented out the code that attempts to synchronize metadata
+ * writes between processes, but we should really just flag an error
+ * whenever any process other than process 0 attempts to write
+ * metadata.
+ * -- JRM 9/1/05
+ */
if(type!=H5FD_MEM_DRAW) {
unsigned block_before_meta_write=0; /* Whether to block before a metadata write */
@@ -1252,9 +1257,11 @@ H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
if(H5P_get(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,&block_before_meta_write)<0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get H5AC property")
+#if 0 /* JRM */
if(block_before_meta_write)
if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code)
+#endif /* JRM */
/* Only one process will do the actual write if all procs in comm write same metadata */
if (file->mpi_rank != H5_PAR_META_WRITE)
@@ -1328,6 +1335,14 @@ done:
file->pos = HADDR_UNDEF;
file->op = OP_UNKNOWN;
} /* end if */
+#if 0 /* JRM */
+ /* Since metadata writes are now done by process 0 only, this broadcast
+ * is no longer needed. I leave it in and commented out to remind us
+ * that we need to re-work this function to reflect this reallity.
+ *
+ * -- JRM 9/1/05
+ */
+
/* Guard against getting into metadata broadcast in failure cases */
else {
/* when only one process writes, need to broadcast the ret_value to other processes */
@@ -1336,6 +1351,7 @@ done:
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
} /* end if */
} /* end else */
+#endif /* JRM */
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5FD_mpiposix_write() */
diff --git a/src/H5FDmulti.c b/src/H5FDmulti.c
index 71b5c91..a1a065e 100644
--- a/src/H5FDmulti.c
+++ b/src/H5FDmulti.c
@@ -1628,7 +1628,14 @@ H5FD_multi_alloc(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, hsize_t size)
if (HADDR_UNDEF==(addr=H5FDalloc(file->memb[mmt], type, dxpl_id, size)))
H5Epush_ret(func, H5E_ERR_CLS, H5E_INTERNAL, H5E_BADVALUE, "member file can't alloc", HADDR_UNDEF)
addr += file->fa.memb_addr[mmt];
- if (addr+size>file->eoa) file->eoa = addr+size;
+ if ( addr + size > file->eoa ) {
+
+ if ( H5FD_multi_set_eoa(_file, addr + size) < 0 ) {
+
+ H5Epush_ret(func, H5E_ERR_CLS, H5E_INTERNAL, H5E_BADVALUE, \
+ "can't set eoa", HADDR_UNDEF)
+ }
+ }
return addr;
}
diff --git a/src/H5Fprivate.h b/src/H5Fprivate.h
index b9d9d74..53d3f05 100644
--- a/src/H5Fprivate.h
+++ b/src/H5Fprivate.h
@@ -448,6 +448,7 @@ H5_DLL haddr_t H5F_get_eoa(const H5F_t *f);
#ifdef H5_HAVE_PARALLEL
H5_DLL int H5F_mpi_get_rank(const H5F_t *f);
H5_DLL MPI_Comm H5F_mpi_get_comm(const H5F_t *f);
+H5_DLL int H5F_mpi_get_size(const H5F_t *f);
#endif /* H5_HAVE_PARALLEL */
/* Functions than check file mounting information */
diff --git a/src/H5HL.c b/src/H5HL.c
index ee31ecc..9de9f5b 100644
--- a/src/H5HL.c
+++ b/src/H5HL.c
@@ -155,7 +155,6 @@ H5HL_create(H5F_t *f, hid_t dxpl_id, size_t size_hint, haddr_t *addr_p/*out*/)
heap->addr = *addr_p + (hsize_t)sizeof_hdr;
heap->disk_alloc = size_hint;
heap->mem_alloc = size_hint;
- heap->disk_resrv = 0;
if (NULL==(heap->chunk = H5FL_BLK_CALLOC(heap_chunk,(sizeof_hdr + size_hint))))
HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed");
@@ -320,6 +319,20 @@ done:
*
* Modifications:
*
+ * John Mainzer, 8/10/05
+ * Reworked this function for a different role.
+ *
+ * It used to be called during cache eviction, where it
+ * attempted to size the disk space allocation for the
+ * actuall size of the heap. However, this causes problems
+ * in the parallel case, as the reuslting disk allocations
+ * may not be synchronized.
+ *
+ * It is now called from H5HL_remove(), where it is used to
+ * reduce heap size in response to an entry deletion. This
+ * means that the function should either do nothing, or
+ * reduce the size of the disk allocation.
+ *
*-------------------------------------------------------------------------
*/
static herr_t
@@ -331,19 +344,13 @@ H5HL_minimize_heap_space(H5F_t *f, hid_t dxpl_id, H5HL_t *heap)
FUNC_ENTER_NOAPI(H5HL_minimize_heap_space, FAIL)
/* check args */
- assert(f);
- assert(heap);
+ HDassert( f );
+ HDassert( heap );
+ HDassert( heap->disk_alloc == heap->mem_alloc );
sizeof_hdr = H5HL_SIZEOF_HDR(f); /* cache H5HL header size for file */
/*
- * When the heap is being flushed to disk, release the file space reserved
- * for it.
- */
- H5MF_free_reserved(f, (hsize_t)heap->disk_resrv);
- heap->disk_resrv = 0;
-
- /*
* Check to see if we can reduce the size of the heap in memory by
* eliminating free blocks at the tail of the buffer before flushing the
* buffer out.
@@ -427,13 +434,15 @@ H5HL_minimize_heap_space(H5F_t *f, hid_t dxpl_id, H5HL_t *heap)
}
/*
- * If the heap grew larger or smaller than disk storage then move the
+ * If the heap grew smaller than disk storage then move the
* data segment of the heap to another contiguous block of disk
* storage.
*/
if (heap->mem_alloc != heap->disk_alloc) {
haddr_t old_addr = heap->addr, new_addr;
+ HDassert( heap->mem_alloc < heap->disk_alloc );
+
/* Release old space on disk */
H5_CHECK_OVERFLOW(heap->disk_alloc, size_t, hsize_t);
H5MF_xfree(f, H5FD_MEM_LHEAP, dxpl_id, old_addr, (hsize_t)heap->disk_alloc);
@@ -453,7 +462,8 @@ H5HL_minimize_heap_space(H5F_t *f, hid_t dxpl_id, H5HL_t *heap)
done:
FUNC_LEAVE_NOAPI(ret_value)
-}
+
+} /* H5HL_minimize_heap_space() */
/*-------------------------------------------------------------------------
@@ -543,6 +553,13 @@ H5HL_serialize(H5F_t *f, H5HL_t *heap, uint8_t *buf)
*
* Bill Wendling, 2003-09-16
* Separated out the bit that serializes the heap.
+ *
+ * John Mainzer, 2005-08-10
+ * Removed call to H5HL_minimize_heap_space(). It does disk space
+ * allocation, which can cause problems if done at flush time.
+ * Instead, disk space allocation/deallocation is now done at
+ * insert/remove time.
+ *
*-------------------------------------------------------------------------
*/
static herr_t
@@ -553,21 +570,18 @@ H5HL_flush(H5F_t *f, hid_t dxpl_id, hbool_t destroy, haddr_t addr, H5HL_t *heap)
FUNC_ENTER_NOAPI(H5HL_flush, FAIL);
/* check arguments */
- assert(f);
- assert(H5F_addr_defined(addr));
- assert(heap);
+ HDassert( f );
+ HDassert( H5F_addr_defined(addr) );
+ HDassert( heap );
+ HDassert( heap->disk_alloc == heap->mem_alloc );
if (heap->cache_info.is_dirty) {
haddr_t hdr_end_addr;
size_t sizeof_hdr = H5HL_SIZEOF_HDR(f); /* cache H5HL header size for file */
- /* Minimize the heap space size if possible */
- if (H5HL_minimize_heap_space(f, dxpl_id, heap) < 0)
- HGOTO_ERROR(H5E_HEAP, H5E_CANTFREE, FAIL, "unable to minimize local heap space")
-
/* Write the header */
if (H5HL_serialize(f, heap, heap->chunk) < 0)
- HGOTO_ERROR(H5E_BTREE, H5E_CANTSERIALIZE, FAIL, "unable to serialize local heap")
+ HGOTO_ERROR(H5E_HEAP, H5E_CANTSERIALIZE, FAIL, "unable to serialize local heap")
/* Copy buffer to disk */
hdr_end_addr = addr + (hsize_t)sizeof_hdr;
@@ -951,6 +965,10 @@ H5HL_remove_free(H5HL_t *heap, H5HL_free_t *fl)
* H5AC_unprotect() instead of manipulating the is_dirty
* field of the cache info directly.
*
+ * John Mainzer, 8/10/05
+ * Modified code to allocate file space as needed, instead
+ * of allocating it on eviction.
+ *
*-------------------------------------------------------------------------
*/
size_t
@@ -959,10 +977,12 @@ H5HL_insert(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t buf_size, const void *
H5HL_t *heap = NULL;
unsigned heap_flags = H5AC__NO_FLAGS_SET;
H5HL_free_t *fl = NULL, *max_fl = NULL;
+ htri_t tri_result;
+ herr_t result;
size_t offset = 0;
size_t need_size, old_size, need_more;
+ size_t new_disk_alloc;
hbool_t found;
- size_t disk_resrv; /* Amount of additional space to reserve in file */
size_t sizeof_hdr; /* Cache H5HL header size for file */
size_t ret_value; /* Return value */
@@ -1028,18 +1048,54 @@ H5HL_insert(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t buf_size, const void *
if (found==FALSE) {
need_more = MAX3(need_size, heap->mem_alloc, H5HL_SIZEOF_FREE(f));
- /* Reserve space in the file to hold the increased heap size
- */
- if( heap->disk_resrv == heap->mem_alloc)
- disk_resrv = need_more;
- else
- disk_resrv = heap->mem_alloc + need_more - heap->disk_resrv;
+ new_disk_alloc = heap->disk_alloc + need_more;
+ HDassert( heap->disk_alloc < new_disk_alloc );
+ H5_CHECK_OVERFLOW(heap->disk_alloc, size_t, hsize_t);
+ H5_CHECK_OVERFLOW(new_disk_alloc, size_t, hsize_t);
+
+ /* extend the current heap if we can... */
+ tri_result = H5MF_can_extend(f, H5FD_MEM_LHEAP, heap->addr,
+ (hsize_t)(heap->disk_alloc),
+ (hsize_t)need_more);
+ if ( tri_result == TRUE ) {
+
+ result = H5MF_extend(f, H5FD_MEM_LHEAP, heap->addr,
+ (hsize_t)(heap->disk_alloc),
+ (hsize_t)need_more);
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, (size_t)(-1), \
+ "can't extend heap on disk");
+ }
+
+ heap->disk_alloc = new_disk_alloc;
+
+ } else { /* ...if we can't, allocate a new chunk & release the old */
+
+ haddr_t old_addr = heap->addr;
+ haddr_t new_addr;
+
+ /* The new allocation may fail -- to avoid the possiblity of
+ * file corruption, allocate the new heap first, and then
+ * deallocate the old.
+ */
+
+ /* allocate new disk space for the heap */
+ if ( (new_addr = H5MF_alloc(f, H5FD_MEM_LHEAP, dxpl_id,
+ (hsize_t)new_disk_alloc)) == HADDR_UNDEF ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, (size_t)(-1), \
+ "unable to allocate file space for heap")
+ }
- if( H5MF_reserve(f, (hsize_t)disk_resrv) < 0 )
- HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, (size_t)(-1), "unable to reserve space in file");
+ /* Release old space on disk */
+ H5MF_xfree(f, H5FD_MEM_LHEAP, dxpl_id, old_addr,
+ (hsize_t)heap->disk_alloc);
+ H5E_clear_stack(NULL); /* don't really care if the free failed */
- /* Update heap's record of how much space it has reserved */
- heap->disk_resrv += disk_resrv;
+ heap->addr = new_addr;
+ heap->disk_alloc = new_disk_alloc;
+ }
if (max_fl && max_fl->offset + max_fl->size == heap->mem_alloc) {
/*
@@ -1117,7 +1173,8 @@ done:
HDONE_ERROR(H5E_HEAP, H5E_PROTECT, (size_t)(-1), "unable to release object header");
FUNC_LEAVE_NOAPI(ret_value);
-}
+
+} /* H5HL_insert() */
#ifdef NOT_YET
@@ -1217,6 +1274,11 @@ done:
* H5AC_unprotect() instead of manipulating the is_dirty
* field of the cache info directly.
*
+ * John Mainzer, 8/10/05
+ * Modified code to attempt to decrease heap size if the
+ * entry removal results in a free list entry at the end
+ * of the heap that is at least half the size of the heap.
+ *
*-------------------------------------------------------------------------
*/
herr_t
@@ -1225,15 +1287,15 @@ H5HL_remove(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t offset, size_t size)
H5HL_t *heap = NULL;
unsigned heap_flags = H5AC__NO_FLAGS_SET;
H5HL_free_t *fl = NULL, *fl2 = NULL;
- herr_t ret_value=SUCCEED; /* Return value */
+ herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(H5HL_remove, FAIL);
/* check arguments */
- assert(f);
- assert(H5F_addr_defined(addr));
- assert(size > 0);
- assert (offset==H5HL_ALIGN (offset));
+ HDassert( f );
+ HDassert( H5F_addr_defined(addr) );
+ HDassert( size > 0 );
+ HDassert( offset == H5HL_ALIGN(offset) );
if (0==(f->intent & H5F_ACC_RDWR))
HGOTO_ERROR (H5E_HEAP, H5E_WRITEERROR, FAIL, "no write intent on file");
@@ -1243,8 +1305,9 @@ H5HL_remove(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t offset, size_t size)
if (NULL == (heap = H5AC_protect(f, dxpl_id, H5AC_LHEAP, addr, NULL, NULL, H5AC_WRITE)))
HGOTO_ERROR(H5E_HEAP, H5E_PROTECT, FAIL, "unable to load heap");
- assert(offset < heap->mem_alloc);
- assert(offset + size <= heap->mem_alloc);
+ HDassert( offset < heap->mem_alloc );
+ HDassert( offset + size <= heap->mem_alloc );
+ HDassert( heap->disk_alloc == heap->mem_alloc );
fl = heap->freelist;
heap_flags |= H5AC__DIRTIED_FLAG;
@@ -1268,10 +1331,29 @@ H5HL_remove(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t offset, size_t size)
assert (fl->offset==H5HL_ALIGN (fl->offset));
assert (fl->size==H5HL_ALIGN (fl->size));
fl2 = H5HL_remove_free(heap, fl2);
+ if ( ( (fl->offset + fl->size) == heap->mem_alloc ) &&
+ ( (2 * fl->size) > heap->mem_alloc ) ) {
+
+ if ( H5HL_minimize_heap_space(f, dxpl_id, heap) !=
+ SUCCEED ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, \
+ "heap size minimization failed");
+ }
+ }
HGOTO_DONE(SUCCEED);
}
fl2 = fl2->next;
}
+ if ( ( (fl->offset + fl->size) == heap->mem_alloc ) &&
+ ( (2 * fl->size) > heap->mem_alloc ) ) {
+
+ if ( H5HL_minimize_heap_space(f, dxpl_id, heap) != SUCCEED ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, \
+ "heap size minimization failed");
+ }
+ }
HGOTO_DONE(SUCCEED);
} else if (fl->offset + fl->size == offset) {
@@ -1283,10 +1365,29 @@ H5HL_remove(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t offset, size_t size)
fl->size += fl2->size;
assert (fl->size==H5HL_ALIGN (fl->size));
fl2 = H5HL_remove_free(heap, fl2);
+ if ( ( (fl->offset + fl->size) == heap->mem_alloc ) &&
+ ( (2 * fl->size) > heap->mem_alloc ) ) {
+
+ if ( H5HL_minimize_heap_space(f, dxpl_id, heap) !=
+ SUCCEED ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, \
+ "heap size minimization failed");
+ }
+ }
HGOTO_DONE(SUCCEED);
}
fl2 = fl2->next;
}
+ if ( ( (fl->offset + fl->size) == heap->mem_alloc ) &&
+ ( (2 * fl->size) > heap->mem_alloc ) ) {
+
+ if ( H5HL_minimize_heap_space(f, dxpl_id, heap) != SUCCEED ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, \
+ "heap size minimization failed");
+ }
+ }
HGOTO_DONE(SUCCEED);
}
fl = fl->next;
@@ -1321,6 +1422,16 @@ H5HL_remove(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t offset, size_t size)
heap->freelist->prev = fl;
heap->freelist = fl;
+ if ( ( (fl->offset + fl->size) == heap->mem_alloc ) &&
+ ( (2 * fl->size) > heap->mem_alloc ) ) {
+
+ if ( H5HL_minimize_heap_space(f, dxpl_id, heap) != SUCCEED ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, \
+ "heap size minimization failed");
+ }
+ }
+
done:
if (heap && H5AC_unprotect(f, dxpl_id, H5AC_LHEAP, addr, heap, heap_flags) != SUCCEED)
HDONE_ERROR(H5E_HEAP, H5E_PROTECT, FAIL, "unable to release object header");
diff --git a/src/H5HLpkg.h b/src/H5HLpkg.h
index 1a61866..8b099cc 100644
--- a/src/H5HLpkg.h
+++ b/src/H5HLpkg.h
@@ -67,7 +67,6 @@ struct H5HL_t {
haddr_t addr; /*address of data */
size_t disk_alloc; /*data bytes allocated on disk */
size_t mem_alloc; /*data bytes allocated in mem */
- size_t disk_resrv; /*data bytes "reserved" on disk */
uint8_t *chunk; /*the chunk, including header */
H5HL_free_t *freelist; /*the free list */
};
diff --git a/src/H5MF.c b/src/H5MF.c
index f4ce055..6f68027 100644
--- a/src/H5MF.c
+++ b/src/H5MF.c
@@ -399,9 +399,9 @@ done:
*
* Purpose: Extend a block in the file.
*
- * Return: Success: TRUE(1)/FALSE(0)
+ * Return: Success: Non-negative
*
- * Failure: FAIL
+ * Failure: Negative
*
* Programmer: Quincey Koziol
* Saturday, June 12, 2004
@@ -410,10 +410,10 @@ done:
*
*-------------------------------------------------------------------------
*/
-htri_t
+herr_t
H5MF_extend(H5F_t *f, H5FD_mem_t type, haddr_t addr, hsize_t size, hsize_t extra_requested)
{
- htri_t ret_value; /* Return value */
+ herr_t ret_value; /* Return value */
FUNC_ENTER_NOAPI(H5MF_extend, FAIL);
diff --git a/src/H5MFprivate.h b/src/H5MFprivate.h
index ee35c2a..01b8204 100644
--- a/src/H5MFprivate.h
+++ b/src/H5MFprivate.h
@@ -53,7 +53,7 @@ H5_DLL herr_t H5MF_free_reserved(H5F_t *f, hsize_t size);
H5_DLL hbool_t H5MF_alloc_overflow(H5F_t *f, hsize_t size);
H5_DLL htri_t H5MF_can_extend(H5F_t *f, H5FD_mem_t type, haddr_t addr,
hsize_t size, hsize_t extra_requested);
-H5_DLL htri_t H5MF_extend(H5F_t *f, H5FD_mem_t type, haddr_t addr, hsize_t size,
+H5_DLL herr_t H5MF_extend(H5F_t *f, H5FD_mem_t type, haddr_t addr, hsize_t size,
hsize_t extra_requested);
#endif
diff --git a/src/H5O.c b/src/H5O.c
index c1d8d46..81da26f 100644
--- a/src/H5O.c
+++ b/src/H5O.c
@@ -89,10 +89,12 @@ static int H5O_append_real(H5F_t *f, hid_t dxpl_id, H5O_t *oh,
unsigned * oh_flags_ptr);
static herr_t H5O_remove_real(H5G_entry_t *ent, const H5O_class_t *type,
int sequence, H5O_operator_t op, void *op_data, hbool_t adj_link, hid_t dxpl_id);
-static unsigned H5O_alloc(H5F_t *f, H5O_t *oh, const H5O_class_t *type,
- size_t size, unsigned * oh_flags_ptr);
-static unsigned H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, unsigned chunkno, size_t size);
-static unsigned H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size);
+static unsigned H5O_alloc(H5F_t *f, hid_t dxpl_id, H5O_t *oh,
+ const H5O_class_t *type, size_t size, hbool_t * oh_dirtied_ptr);
+static htri_t H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh,
+ unsigned chunkno, size_t size, unsigned * msg_idx);
+static unsigned H5O_alloc_new_chunk(H5F_t *f, hid_t dxpl_id, H5O_t *oh,
+ size_t size);
static herr_t H5O_delete_oh(H5F_t *f, hid_t dxpl_id, H5O_t *oh);
static herr_t H5O_delete_mesg(H5F_t *f, hid_t dxpl_id, H5O_mesg_t *mesg,
hbool_t adj_link);
@@ -582,8 +584,12 @@ H5O_load(H5F_t *f, hid_t dxpl_id, haddr_t addr, const void UNUSED * _udata1,
p += 3; /*reserved*/
/* Try to detect invalidly formatted object header messages */
- if (p + mesg_size > oh->chunk[chunkno].image + chunk_size)
- HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, NULL, "corrupt object header");
+ if (p + mesg_size > oh->chunk[chunkno].image + chunk_size) {
+
+ HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, NULL, \
+ "corrupt object header");
+ }
+
/* Skip header messages we don't know about */
/* (Usually from future versions of the library */
@@ -740,17 +746,12 @@ H5O_flush(H5F_t *f, hid_t dxpl_id, hbool_t destroy, haddr_t addr, H5O_t *oh)
/* allocate file space for chunks that have none yet */
if (H5O_CONT_ID == curr_msg->type->id &&
!H5F_addr_defined(((H5O_cont_t *)(curr_msg->native))->addr)) {
- cont = (H5O_cont_t *) (curr_msg->native);
- assert(cont->chunkno < oh->nchunks);
- assert(!H5F_addr_defined(oh->chunk[cont->chunkno].addr));
- cont->size = oh->chunk[cont->chunkno].size;
-
- /* Free the space we'd reserved in the file to hold this chunk */
- H5MF_free_reserved(f, (hsize_t)cont->size);
-
- if (HADDR_UNDEF==(cont->addr=H5MF_alloc(f, H5FD_MEM_OHDR, dxpl_id, (hsize_t)cont->size)))
- HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "unable to allocate space for object header data");
- oh->chunk[cont->chunkno].addr = cont->addr;
+ /* We now allocate disk space on insertion, instead
+ * of on flush from the cache, so this case is now an
+ * error. -- JRM
+ */
+ HGOTO_ERROR(H5E_OHDR, H5E_SYSTEM, FAIL,
+ "File space for message not allocated!?!");
}
/*
@@ -1972,7 +1973,7 @@ H5O_modify_real(H5G_entry_t *ent, const H5O_class_t *type, int overwrite,
/* Update the modification time message if any */
if(update_flags&H5O_UPDATE_TIME)
- H5O_touch_oh(ent->file, oh, FALSE, &oh_flags);
+ H5O_touch_oh(ent->file, dxpl_id, oh, FALSE, &oh_flags);
/* Set return value */
ret_value = sequence;
@@ -2258,7 +2259,8 @@ H5O_new_mesg(H5F_t *f, H5O_t *oh, unsigned *flags, const H5O_class_t *orig_type,
HGOTO_ERROR (H5E_OHDR, H5E_CANTINIT, UFAIL, "object header message is too large (16k max)");
/* Allocate space in the object headed for the message */
- if ((ret_value = H5O_alloc(f, oh, orig_type, size, oh_flags_ptr)) == UFAIL)
+ if ((ret_value = H5O_alloc(f, dxpl_id, oh,
+ orig_type, size, oh_flags_ptr)) == UFAIL)
HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, UFAIL, "unable to allocate space for message");
/* Increment any links in message */
@@ -2346,10 +2348,18 @@ done:
* In this case, that requires the addition of the oh_dirtied_ptr
* parameter to track whether *oh is dirty.
*
+ * John Mainzer, 8/20/05
+ * Added dxpl_id parameter needed by the revised version of
+ * H5O_alloc().
+ *
*-------------------------------------------------------------------------
*/
herr_t
-H5O_touch_oh(H5F_t *f, H5O_t *oh, hbool_t force, unsigned * oh_flags_ptr)
+H5O_touch_oh(H5F_t *f,
+ hid_t dxpl_id,
+ H5O_t *oh,
+ hbool_t force,
+ unsigned * oh_flags_ptr)
{
unsigned idx;
#ifdef H5_HAVE_GETTIMEOFDAY
@@ -2382,7 +2392,9 @@ H5O_touch_oh(H5F_t *f, H5O_t *oh, hbool_t force, unsigned * oh_flags_ptr)
if (!force)
HGOTO_DONE(SUCCEED); /*nothing to do*/
size = (H5O_MTIME_NEW->raw_size)(f, &now);
- if ((idx=H5O_alloc(f, oh, H5O_MTIME_NEW, size, oh_flags_ptr))==UFAIL)
+
+ if ((idx=H5O_alloc(f, dxpl_id, oh, H5O_MTIME_NEW,
+ size, oh_flags_ptr))==UFAIL)
HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, FAIL, "unable to allocate space for modification time message");
}
@@ -2442,7 +2454,7 @@ H5O_touch(H5G_entry_t *ent, hbool_t force, hid_t dxpl_id)
HGOTO_ERROR(H5E_OHDR, H5E_CANTLOAD, FAIL, "unable to load object header");
/* Create/Update the modification time message */
- if (H5O_touch_oh(ent->file, oh, force, &oh_flags)<0)
+ if (H5O_touch_oh(ent->file, dxpl_id, oh, force, &oh_flags)<0)
HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, FAIL, "unable to update object modificaton time");
done:
@@ -2473,10 +2485,13 @@ done:
* In this case, that requires the addition of the oh_dirtied_ptr
* parameter to track whether *oh is dirty.
*
+ * John Mainzer, 8/20/05
+ * Added dxpl_id parameter needed by call to H5O_alloc().
+ *
*-------------------------------------------------------------------------
*/
herr_t
-H5O_bogus_oh(H5F_t *f, H5O_t *oh, hbool_t * oh_flags_ptr)
+H5O_bogus_oh(H5F_t *f, hid_t dxpl_id, H5O_t *oh, hbool_t * oh_flags_ptr)
{
int idx;
size_t size;
@@ -2496,7 +2511,7 @@ H5O_bogus_oh(H5F_t *f, H5O_t *oh, hbool_t * oh_flags_ptr)
/* Create a new message */
if (idx==oh->nmesgs) {
size = (H5O_BOGUS->raw_size)(f, NULL);
- if ((idx=H5O_alloc(f, oh, H5O_BOGUS, size, oh_flags_ptr))<0)
+ if ((idx=H5O_alloc(f, dxpl_id, oh, H5O_BOGUS, size, oh_flags_ptr))<0)
HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, FAIL, "unable to allocate space for 'bogus' message");
/* Allocate the native message in memory */
@@ -2507,6 +2522,7 @@ H5O_bogus_oh(H5F_t *f, H5O_t *oh, hbool_t * oh_flags_ptr)
((H5O_bogus_t *)(oh->mesg[idx].native))->u = H5O_BOGUS_VALUE;
/* Mark the message and object header as dirty */
+ *oh_flags_ptr = TRUE;
oh->mesg[idx].dirty = TRUE;
oh->dirty = TRUE;
} /* end if */
@@ -2558,7 +2574,7 @@ H5O_bogus(H5G_entry_t *ent, hid_t dxpl_id)
HGOTO_ERROR(H5E_OHDR, H5E_CANTLOAD, FAIL, "unable to load object header");
/* Create the "bogus" message */
- if (H5O_bogus_oh(ent->file, oh, &oh_flags)<0)
+ if (H5O_bogus_oh(ent->file, dxpl_id, oh, &oh_flags)<0)
HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, FAIL, "unable to update object 'bogus' message");
done:
@@ -2816,71 +2832,115 @@ done:
} /* end H5O_remove_real() */
-/*-------------------------------------------------------------------------
- * Function: H5O_alloc_extend_chunk
+/*-------------------------------------------------------------------------
*
- * Purpose: Extends a chunk which hasn't been allocated on disk yet
- * to make the chunk large enough to contain a message whose
- * data size is exactly SIZE bytes (SIZE need not be aligned).
+ * Function: H5O_alloc_extend_chunk
*
- * If the last message of the chunk is the null message, then
- * that message will be extended with the chunk. Otherwise a
- * new null message is created.
+ * Purpose: Attempt to extend a chunk that is allocated on disk.
*
- * F is the file in which the chunk will be written. It is
- * included to ensure that there is enough space to extend
- * this chunk.
+ * If the extension is successful, and if the last message
+ * of the chunk is the null message, then that message will
+ * be extended with the chunk. Otherwise a new null message
+ * is created.
*
- * Return: Success: Message index for null message which
- * is large enough to hold SIZE bytes.
+ * f is the file in which the chunk will be written. It is
+ * included to ensure that there is enough space to extend
+ * this chunk.
*
- * Failure: Negative
+ * Return: TRUE: The chunk has been extended, and *msg_idx
+ * contains the message index for null message
+ * which is large enough to hold size bytes.
*
- * Programmer: Robb Matzke
- * matzke@llnl.gov
- * Aug 7 1997
+ * FALSE: The chunk cannot be extended, and *msg_idx
+ * is undefined.
+ *
+ * FAIL: Some internal error has been detected.
+ *
+ * Programmer: John Mainzer -- 8/16/05
*
* Modifications:
- * Robb Matzke, 1999-08-26
- * If new memory is allocated as a multiple of some alignment
- * then we're careful to initialize the part of the new memory
- * from the end of the expected message to the end of the new
- * memory.
+ *
*-------------------------------------------------------------------------
*/
-static unsigned
-H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, unsigned chunkno, size_t size)
+static htri_t
+H5O_alloc_extend_chunk(H5F_t *f,
+ H5O_t *oh,
+ unsigned chunkno,
+ size_t size,
+ unsigned * msg_idx)
{
- unsigned u;
- unsigned idx;
- size_t delta, old_size;
- size_t aligned_size = H5O_ALIGN(size);
- uint8_t *old_addr;
- unsigned ret_value;
+ unsigned u;
+ unsigned idx;
+ unsigned i;
+ size_t delta, old_size;
+ size_t aligned_size = H5O_ALIGN(size);
+ uint8_t *old_addr;
+ herr_t result;
+ htri_t tri_result;
+ htri_t ret_value; /* return value */
+ hbool_t cont_updated;
FUNC_ENTER_NOAPI_NOINIT(H5O_alloc_extend_chunk);
/* check args */
- assert(oh);
- assert(chunkno < oh->nchunks);
- assert(size > 0);
+ HDassert( f != NULL );
+ HDassert( oh != NULL );
+ HDassert( chunkno < oh->nchunks );
+ HDassert( size > 0 );
+ HDassert( msg_idx != NULL );
- if (H5F_addr_defined(oh->chunk[chunkno].addr))
- HGOTO_ERROR(H5E_OHDR, H5E_NOSPACE, UFAIL, "chunk is on disk");
+ if ( !H5F_addr_defined(oh->chunk[chunkno].addr) ) {
+
+ HGOTO_ERROR(H5E_OHDR, H5E_NOSPACE, FAIL, "chunk isn't on disk");
+ }
+
+ /* Test to see if the specified chunk ends with a null messages. If
+ * it does, try to extend the chunk and (thereby) the null message.
+ * If successful, return the index of the the null message in *msg_idx.
+ */
+ for ( idx = 0; idx < oh->nmesgs; idx++ )
+ {
+ if (oh->mesg[idx].chunkno==chunkno) {
- /* try to extend a null message */
- for (idx=0; idx<oh->nmesgs; idx++) {
- if (oh->mesg[idx].chunkno==chunkno) {
if (H5O_NULL_ID == oh->mesg[idx].type->id &&
(oh->mesg[idx].raw + oh->mesg[idx].raw_size ==
oh->chunk[chunkno].image + oh->chunk[chunkno].size)) {
- delta = MAX (H5O_MIN_SIZE, aligned_size - oh->mesg[idx].raw_size);
- assert (delta=H5O_ALIGN (delta));
+ delta = MAX(H5O_MIN_SIZE,
+ aligned_size - oh->mesg[idx].raw_size);
+
+ HDassert( delta == H5O_ALIGN(delta) );
+
+ /* determine whether the chunk can be extended */
+ tri_result = H5MF_can_extend(f, H5FD_MEM_OHDR,
+ oh->chunk[chunkno].addr,
+ (hsize_t)(oh->chunk[chunkno].size),
+ (hsize_t)delta);
+
+ if ( tri_result == FALSE ) { /* can't extend -- we are done */
+
+ HGOTO_DONE(FALSE);
+
+ } else if ( tri_result != TRUE ) { /* system error */
- /* Reserve space in the file to hold the increased chunk size */
- if( H5MF_reserve(f, (hsize_t)delta) < 0 )
- HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, "unable to reserve space in file");
+ HGOTO_ERROR (H5E_RESOURCE, H5E_SYSTEM, FAIL, \
+ "H5MF_can_extend() failed");
+ }
+
+ /* if we get this far, we should be able to extend the chunk */
+ result = H5MF_extend(f, H5FD_MEM_OHDR,
+ oh->chunk[chunkno].addr,
+ (hsize_t)(oh->chunk[chunkno].size),
+ (hsize_t)delta);
+
+ if ( result < 0 ) { /* system error */
+
+ HGOTO_ERROR (H5E_RESOURCE, H5E_SYSTEM, FAIL, \
+ "H5MF_extend() failed.");
+ }
+
+
+ /* chunk size has been increased -- tidy up */
oh->mesg[idx].dirty = TRUE;
oh->mesg[idx].raw_size += delta;
@@ -2888,12 +2948,19 @@ H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, unsigned chunkno, size_t size)
old_addr = oh->chunk[chunkno].image;
/* Be careful not to indroduce garbage */
- oh->chunk[chunkno].image = H5FL_BLK_REALLOC(chunk_image,old_addr,
- (oh->chunk[chunkno].size + delta));
- if (NULL==oh->chunk[chunkno].image)
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed");
+ oh->chunk[chunkno].image =
+ H5FL_BLK_REALLOC(chunk_image,old_addr,
+ (oh->chunk[chunkno].size + delta));
+
+ if ( NULL == oh->chunk[chunkno].image ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \
+ "memory allocation failed");
+ }
+
HDmemset(oh->chunk[chunkno].image + oh->chunk[chunkno].size,
0, delta);
+
oh->chunk[chunkno].size += delta;
/* adjust raw addresses for messages of this chunk */
@@ -2904,35 +2971,106 @@ H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, unsigned chunkno, size_t size)
(oh->mesg[u].raw - old_addr);
}
}
- HGOTO_DONE(idx);
+
+ /* adjust the continuation message pointing to this chunk
+ * for the increase in chunk size.
+ *
+ * As best I understand the code, it is not necessarily an
+ * error if there is no continuation message pointing to a
+ * chunk -- for example, chunk 0 seems to be pointed to by
+ * the object header.
+ */
+ cont_updated = FALSE;
+ for ( i = 0; i < oh->nmesgs; i++ )
+ {
+ if ( ( H5O_CONT_ID == oh->mesg[i].type->id ) &&
+ ( ((H5O_cont_t *)(oh->mesg[i].native))->chunkno
+ == chunkno ) ) {
+
+ HDassert( ((H5O_cont_t *)(oh->mesg[i].native))->size
+ == oh->chunk[chunkno].size - delta );
+
+ ((H5O_cont_t *)(oh->mesg[i].native))->size =
+ oh->chunk[chunkno].size;
+
+ cont_updated = TRUE;
+
+ /* there should be at most one continuation message
+ * pointing to this chunk, so we can quit when we find
+ * and update it.
+ */
+ break;
+ }
+ }
+ HDassert( ( chunkno == 0 ) || ( cont_updated ) );
+
+ *msg_idx = idx;
+ HGOTO_DONE(TRUE);
}
} /* end if */
- }
+ } /* end for */
- /* Reserve space in the file */
+ /* if we get this far, the specified chunk does not end in a null message.
+ * Attempt to extend the chunk, and if successful, fill the new section
+ * of the chunk with a null messages.
+ */
+
+ /* compute space needed in the file */
delta = MAX(H5O_MIN_SIZE, aligned_size+H5O_SIZEOF_MSGHDR(f));
delta = H5O_ALIGN(delta);
- if( H5MF_reserve(f, (hsize_t)delta) < 0 )
- HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, "unable to reserve space in file");
+ /* determine whether the chunk can be extended */
+ tri_result = H5MF_can_extend(f, H5FD_MEM_OHDR,
+ oh->chunk[chunkno].addr,
+ (hsize_t)(oh->chunk[chunkno].size),
+ (hsize_t)delta);
+
+ if ( tri_result == FALSE ) { /* can't extend -- we are done */
+
+ HGOTO_DONE(FALSE);
+
+ } else if ( tri_result != TRUE ) { /* system error */
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_SYSTEM, FAIL, "H5MF_can_extend() failed");
+ }
+
+ /* if we get this far, we should be able to extend the chunk */
+ result = H5MF_extend(f, H5FD_MEM_OHDR,
+ oh->chunk[chunkno].addr,
+ (hsize_t)(oh->chunk[chunkno].size),
+ (hsize_t)delta);
+
+ if ( result < 0 ) { /* system error */
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_SYSTEM, FAIL, \
+ "H5MF_extend() failed");
+ }
+
/* create a new null message */
- if (oh->nmesgs >= oh->alloc_nmesgs) {
+ if ( oh->nmesgs >= oh->alloc_nmesgs ) {
+
unsigned na = oh->alloc_nmesgs + H5O_NMESGS;
+
H5O_mesg_t *x = H5FL_SEQ_REALLOC (H5O_mesg_t, oh->mesg, (size_t)na);
- if (NULL==x)
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed");
+ if ( NULL == x ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "memory allocation failed");
+ }
oh->alloc_nmesgs = na;
oh->mesg = x;
}
+
idx = oh->nmesgs++;
+
oh->mesg[idx].type = H5O_NULL;
oh->mesg[idx].dirty = TRUE;
oh->mesg[idx].native = NULL;
oh->mesg[idx].raw = oh->chunk[chunkno].image +
- oh->chunk[chunkno].size +
- H5O_SIZEOF_MSGHDR(f);
+ oh->chunk[chunkno].size +
+ H5O_SIZEOF_MSGHDR(f);
oh->mesg[idx].raw_size = delta - H5O_SIZEOF_MSGHDR(f);
oh->mesg[idx].chunkno = chunkno;
@@ -2940,74 +3078,125 @@ H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, unsigned chunkno, size_t size)
old_size = oh->chunk[chunkno].size;
oh->chunk[chunkno].size += delta;
oh->chunk[chunkno].image = H5FL_BLK_REALLOC(chunk_image,old_addr,
- oh->chunk[chunkno].size);
- if (NULL==oh->chunk[chunkno].image)
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed");
+ oh->chunk[chunkno].size);
+ if (NULL==oh->chunk[chunkno].image) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "memory allocation failed");
+ }
+
HDmemset(oh->chunk[chunkno].image+old_size, 0,
oh->chunk[chunkno].size - old_size);
/* adjust raw addresses for messages of this chunk */
if (old_addr != oh->chunk[chunkno].image) {
- for (u = 0; u < oh->nmesgs; u++) {
- if (oh->mesg[u].chunkno == chunkno)
- oh->mesg[u].raw = oh->chunk[chunkno].image +
- (oh->mesg[u].raw - old_addr);
- }
+ for (u = 0; u < oh->nmesgs; u++) {
+ if (oh->mesg[u].chunkno == chunkno)
+ oh->mesg[u].raw = oh->chunk[chunkno].image +
+ (oh->mesg[u].raw - old_addr);
+ }
+ }
+
+ /* adjust the continuation message pointing to this chunk for the
+ * increase in chunk size.
+ *
+ * As best I understand the code, it is not necessarily an error
+ * if there is no continuation message pointing to a chunk -- for
+ * example, chunk 0 seems to be pointed to by the object header.
+ */
+ cont_updated = FALSE;
+ for ( i = 0; i < oh->nmesgs; i++ )
+ {
+ if ( ( H5O_CONT_ID == oh->mesg[i].type->id ) &&
+ ( ((H5O_cont_t *)(oh->mesg[i].native))->chunkno == chunkno ) ) {
+
+ HDassert( ((H5O_cont_t *)(oh->mesg[i].native))->size ==
+ oh->chunk[chunkno].size - delta );
+
+ ((H5O_cont_t *)(oh->mesg[i].native))->size =
+ oh->chunk[chunkno].size;
+
+ cont_updated = TRUE;
+
+ /* there should be at most one continuation message
+ * pointing to this chunk, so we can quit when we find
+ * and update it.
+ */
+ break;
+ }
}
+ HDassert( ( chunkno == 0 ) || ( cont_updated ) );
/* Set return value */
- ret_value=idx;
+ *msg_idx = idx;
+ ret_value = TRUE;
done:
+
FUNC_LEAVE_NOAPI(ret_value);
-}
+
+} /* H5O_alloc_extend_chunk() */
/*-------------------------------------------------------------------------
- * Function: H5O_alloc_new_chunk
+ * Function: H5O_alloc_new_chunk
*
- * Purpose: Allocates a new chunk for the object header but doen't
- * give the new chunk a file address yet. One of the other
- * chunks will get an object continuation message. If there
- * isn't room in any other chunk for the object continuation
- * message, then some message from another chunk is moved into
- * this chunk to make room.
+ * Purpose: Allocates a new chunk for the object header, including
+ * file space.
*
- * SIZE need not be aligned.
+ * One of the other chunks will get an object continuation
+ * message. If there isn't room in any other chunk for the
+ * object continuation message, then some message from
+ * another chunk is moved into this chunk to make room.
*
- * Return: Success: Index number of the null message for the
- * new chunk. The null message will be at
- * least SIZE bytes not counting the message
- * ID or size fields.
+ * SIZE need not be aligned.
*
- * Failure: Negative
+ * Return: Success: Index number of the null message for the
+ * new chunk. The null message will be at
+ * least SIZE bytes not counting the message
+ * ID or size fields.
*
- * Programmer: Robb Matzke
- * matzke@llnl.gov
- * Aug 7 1997
+ * Failure: Negative
+ *
+ * Programmer: Robb Matzke
+ * matzke@llnl.gov
+ * Aug 7 1997
*
* Modifications:
*
+ * John Mainzer, 8/17/05
+ * Reworked function to allocate file space immediately,
+ * instead of just allocating core space (as it used to).
+ * This change was necessary, as we were allocating file
+ * space on metadata cache eviction, which need not be
+ * synchronized across all processes. As a result,
+ * different processes were allocating different file
+ * locations to the same chunk.
+ *
*-------------------------------------------------------------------------
*/
static unsigned
-H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size)
+H5O_alloc_new_chunk(H5F_t *f,
+ hid_t dxpl_id,
+ H5O_t *oh,
+ size_t size)
{
- size_t cont_size; /*continuation message size */
- int found_null = (-1); /*best fit null message */
- int found_other = (-1); /*best fit other message */
- unsigned idx; /*message number */
- uint8_t *p = NULL; /*ptr into new chunk */
- H5O_cont_t *cont = NULL; /*native continuation message */
- int chunkno;
- unsigned u;
- unsigned ret_value; /*return value */
+ size_t cont_size; /*continuation message size */
+ int found_null = (-1); /*best fit null message */
+ int found_other = (-1); /*best fit other message */
+ unsigned idx; /*message number */
+ uint8_t *p = NULL; /*ptr into new chunk */
+ H5O_cont_t *cont = NULL; /*native continuation message */
+ int chunkno;
+ unsigned u;
+ unsigned ret_value; /*return value */
+ haddr_t new_chunk_addr;
FUNC_ENTER_NOAPI_NOINIT(H5O_alloc_new_chunk);
/* check args */
- assert (oh);
- assert (size > 0);
+ HDassert (oh);
+ HDassert (size > 0);
size = H5O_ALIGN(size);
/*
@@ -3018,23 +3207,23 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size)
*/
cont_size = H5O_ALIGN (H5F_SIZEOF_ADDR(f) + H5F_SIZEOF_SIZE(f));
for (u=0; u<oh->nmesgs; u++) {
- if (H5O_NULL_ID == oh->mesg[u].type->id) {
- if (cont_size == oh->mesg[u].raw_size) {
- found_null = u;
- break;
- } else if (oh->mesg[u].raw_size >= cont_size &&
- (found_null < 0 ||
- (oh->mesg[u].raw_size <
- oh->mesg[found_null].raw_size))) {
- found_null = u;
- }
- } else if (H5O_CONT_ID == oh->mesg[u].type->id) {
- /*don't consider continuation messages */
- } else if (oh->mesg[u].raw_size >= cont_size &&
- (found_other < 0 ||
- oh->mesg[u].raw_size < oh->mesg[found_other].raw_size)) {
- found_other = u;
- }
+ if (H5O_NULL_ID == oh->mesg[u].type->id) {
+ if (cont_size == oh->mesg[u].raw_size) {
+ found_null = u;
+ break;
+ } else if (oh->mesg[u].raw_size >= cont_size &&
+ (found_null < 0 ||
+ (oh->mesg[u].raw_size <
+ oh->mesg[found_null].raw_size))) {
+ found_null = u;
+ }
+ } else if (H5O_CONT_ID == oh->mesg[u].type->id) {
+ /*don't consider continuation messages */
+ } else if (oh->mesg[u].raw_size >= cont_size &&
+ (found_other < 0 ||
+ oh->mesg[u].raw_size < oh->mesg[found_other].raw_size)) {
+ found_other = u;
+ }
}
assert(found_null >= 0 || found_other >= 0);
@@ -3043,36 +3232,47 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size)
* message, then make sure the new chunk has enough room for that
* other message.
*/
- if (found_null < 0)
- size += H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size;
+ if ( found_null < 0 ) {
+
+ size += H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size;
+ }
/*
* The total chunk size must include the requested space plus enough
- * for the message header. This must be at least some minimum and a
+ * for the message header. This must be at least some minimum and a
* multiple of the alignment size.
*/
size = MAX(H5O_MIN_SIZE, size + H5O_SIZEOF_MSGHDR(f));
assert (size == H5O_ALIGN (size));
- /* Reserve space in the file to hold the new chunk */
- if( H5MF_reserve(f, (hsize_t)size) < 0 )
- HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, "unable to reserve space in file for new chunk");
+ /* allocate space in file to hold the new chunk */
+ new_chunk_addr = H5MF_alloc(f, H5FD_MEM_OHDR, dxpl_id, (hsize_t)size);
+ if ( HADDR_UNDEF == new_chunk_addr ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \
+ "unable to allocate space for new chunk");
+ }
/*
- * Create the new chunk without giving it a file address.
+ * Create the new chunk giving it a file address.
*/
- if (oh->nchunks >= oh->alloc_nchunks) {
+ if ( oh->nchunks >= oh->alloc_nchunks ) {
+
unsigned na = oh->alloc_nchunks + H5O_NCHUNKS;
H5O_chunk_t *x = H5FL_SEQ_REALLOC (H5O_chunk_t, oh->chunk, (size_t)na);
- if (!x)
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed");
+ if ( !x ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \
+ "memory allocation failed");
+ }
oh->alloc_nchunks = na;
oh->chunk = x;
}
+
chunkno = oh->nchunks++;
oh->chunk[chunkno].dirty = TRUE;
- oh->chunk[chunkno].addr = HADDR_UNDEF;
+ oh->chunk[chunkno].addr = new_chunk_addr;
oh->chunk[chunkno].size = size;
if (NULL==(oh->chunk[chunkno].image = p = H5FL_BLK_CALLOC(chunk_image,size)))
HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed");
@@ -3086,35 +3286,40 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size)
unsigned na = oh->alloc_nmesgs + MAX (H5O_NMESGS, 3);
H5O_mesg_t *x = H5FL_SEQ_REALLOC (H5O_mesg_t, oh->mesg, (size_t)na);
- if (!x)
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed");
+ if ( !x ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \
+ "memory allocation failed");
+ }
oh->alloc_nmesgs = na;
oh->mesg = x;
/* Set new object header info to zeros */
HDmemset(&oh->mesg[old_alloc], 0,
- (oh->alloc_nmesgs-old_alloc)*sizeof(H5O_mesg_t));
+ (oh->alloc_nmesgs-old_alloc)*sizeof(H5O_mesg_t));
}
/*
* Describe the messages of the new chunk.
*/
if (found_null < 0) {
- found_null = u = oh->nmesgs++;
- oh->mesg[u].type = H5O_NULL;
- oh->mesg[u].dirty = TRUE;
- oh->mesg[u].native = NULL;
- oh->mesg[u].raw = oh->mesg[found_other].raw;
- oh->mesg[u].raw_size = oh->mesg[found_other].raw_size;
- oh->mesg[u].chunkno = oh->mesg[found_other].chunkno;
-
- oh->mesg[found_other].dirty = TRUE;
+ found_null = u = oh->nmesgs++;
+ oh->mesg[u].type = H5O_NULL;
+ oh->mesg[u].dirty = TRUE;
+ oh->mesg[u].native = NULL;
+ oh->mesg[u].raw = oh->mesg[found_other].raw;
+ oh->mesg[u].raw_size = oh->mesg[found_other].raw_size;
+ oh->mesg[u].chunkno = oh->mesg[found_other].chunkno;
+
+ oh->mesg[found_other].dirty = TRUE;
/* Copy the message to the new location */
- HDmemcpy(p+H5O_SIZEOF_MSGHDR(f),oh->mesg[found_other].raw,oh->mesg[found_other].raw_size);
- oh->mesg[found_other].raw = p + H5O_SIZEOF_MSGHDR(f);
- oh->mesg[found_other].chunkno = chunkno;
- p += H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size;
- size -= H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size;
+ HDmemcpy(p + H5O_SIZEOF_MSGHDR(f),
+ oh->mesg[found_other].raw,
+ oh->mesg[found_other].raw_size);
+ oh->mesg[found_other].raw = p + H5O_SIZEOF_MSGHDR(f);
+ oh->mesg[found_other].chunkno = chunkno;
+ p += H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size;
+ size -= H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size;
}
idx = oh->nmesgs++;
oh->mesg[idx].type = H5O_NULL;
@@ -3130,19 +3335,19 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size)
* two null messages.
*/
if (oh->mesg[found_null].raw_size > cont_size) {
- u = oh->nmesgs++;
- oh->mesg[u].type = H5O_NULL;
- oh->mesg[u].dirty = TRUE;
- oh->mesg[u].native = NULL;
- oh->mesg[u].raw = oh->mesg[found_null].raw +
- cont_size +
- H5O_SIZEOF_MSGHDR(f);
- oh->mesg[u].raw_size = oh->mesg[found_null].raw_size -
- (cont_size + H5O_SIZEOF_MSGHDR(f));
- oh->mesg[u].chunkno = oh->mesg[found_null].chunkno;
-
- oh->mesg[found_null].dirty = TRUE;
- oh->mesg[found_null].raw_size = cont_size;
+ u = oh->nmesgs++;
+ oh->mesg[u].type = H5O_NULL;
+ oh->mesg[u].dirty = TRUE;
+ oh->mesg[u].native = NULL;
+ oh->mesg[u].raw = oh->mesg[found_null].raw +
+ cont_size +
+ H5O_SIZEOF_MSGHDR(f);
+ oh->mesg[u].raw_size = oh->mesg[found_null].raw_size -
+ (cont_size + H5O_SIZEOF_MSGHDR(f));
+ oh->mesg[u].chunkno = oh->mesg[found_null].chunkno;
+
+ oh->mesg[found_null].dirty = TRUE;
+ oh->mesg[found_null].raw_size = cont_size;
}
/*
@@ -3150,10 +3355,13 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size)
*/
oh->mesg[found_null].type = H5O_CONT;
oh->mesg[found_null].dirty = TRUE;
- if (NULL==(cont = H5FL_MALLOC(H5O_cont_t)))
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed");
- cont->addr = HADDR_UNDEF;
- cont->size = 0;
+ if (NULL==(cont = H5FL_MALLOC(H5O_cont_t))) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \
+ "memory allocation failed");
+ }
+ cont->addr = oh->chunk[chunkno].addr;
+ cont->size = oh->chunk[chunkno].size;
cont->chunkno = chunkno;
oh->mesg[found_null].native = cont;
@@ -3162,59 +3370,75 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size)
done:
FUNC_LEAVE_NOAPI(ret_value);
-}
+
+} /* H5O_alloc_new_chunk() */
/*-------------------------------------------------------------------------
- * Function: H5O_alloc
+ * Function: H5O_alloc
*
- * Purpose: Allocate enough space in the object header for this message.
+ * Purpose: Allocate enough space in the object header for this message.
*
- * Return: Success: Index of message
+ * Return: Success: Index of message
*
- * Failure: Negative
+ * Failure: Negative
*
- * Programmer: Robb Matzke
- * matzke@llnl.gov
- * Aug 6 1997
+ * Programmer: Robb Matzke
+ * matzke@llnl.gov
+ * Aug 6 1997
*
* Modifications:
*
* John Mainzer, 6/7/05
* Modified function to use the new dirtied parameter to
- * H5AC_unprotect() instead of modfying the is_dirty field.
+ * H5AC_unprotect() instead of modfying the is_dirty field directly.
* In this case, that requires the addition of the oh_dirtied_ptr
* parameter to track whether *oh is dirty.
*
+ * John Mainzer, 8/19/05
+ * Reworked the function to allocate disk space immediately instead
+ * of waiting to cache eviction time. This is necessary since cache
+ * evictions need not be synchronized across the processes in the
+ * PHDF5 case.
+ *
+ * Note the use of a revised versions of H5O_alloc_new_chunk() and
+ * H5O_alloc_extend_chunk().
+ *
*-------------------------------------------------------------------------
*/
static unsigned
-H5O_alloc(H5F_t *f, H5O_t *oh, const H5O_class_t *type, size_t size, unsigned * oh_flags_ptr)
+H5O_alloc(H5F_t *f,
+ hid_t dxpl_id,
+ H5O_t *oh,
+ const H5O_class_t *type,
+ size_t size,
+ unsigned * oh_flags_ptr)
{
- unsigned idx;
+ unsigned idx = UFAIL;
H5O_mesg_t *msg; /* Pointer to newly allocated message */
- size_t aligned_size = H5O_ALIGN(size);
- unsigned ret_value; /* Return value */
+ size_t aligned_size = H5O_ALIGN(size);
+ htri_t tri_result;
+ unsigned ret_value; /* Return value */
FUNC_ENTER_NOAPI_NOINIT(H5O_alloc);
/* check args */
- assert (oh);
- assert (type);
- assert (oh_flags_ptr);
+ HDassert (oh);
+ HDassert (type);
+ HDassert (oh_flags_ptr);
/* look for a null message which is large enough */
for (idx = 0; idx < oh->nmesgs; idx++) {
- if (H5O_NULL_ID == oh->mesg[idx].type->id &&
+ if (H5O_NULL_ID == oh->mesg[idx].type->id &&
oh->mesg[idx].raw_size >= aligned_size)
- break;
+ break;
}
#ifdef LATER
/*
* Perhaps if we join adjacent null messages we could make one
* large enough... we leave this as an exercise for future
- * programmers :-) This isn't a high priority because when an
+ * programmers :-) This isn't a high priority because when an
* object header is read from disk the null messages are combined
* anyway.
*/
@@ -3222,63 +3446,92 @@ H5O_alloc(H5F_t *f, H5O_t *oh, const H5O_class_t *type, size_t size, unsigned *
/* if we didn't find one, then allocate more header space */
if (idx >= oh->nmesgs) {
- unsigned chunkno;
+ unsigned chunkno;
- /*
- * Look for a chunk which hasn't had disk space allocated yet
- * since we can just increase the size of that chunk.
- */
- for (chunkno = 0; chunkno < oh->nchunks; chunkno++) {
- if ((idx = H5O_alloc_extend_chunk(f, oh, chunkno, size)) != UFAIL) {
- break;
- }
- H5E_clear_stack(NULL);
- }
+ /* check to see if we can extend one of the chunks. If we can,
+ * do so. Otherwise, we will have to allocate a new chunk.
+ *
+ * Note that in this new version of this function, all chunks
+ * must have file space allocated to them.
+ */
+ for ( chunkno = 0; chunkno < oh->nchunks; chunkno++ )
+ {
+ HDassert( H5F_addr_defined(oh->chunk[chunkno].addr) );
- /*
- * Create a new chunk
- */
- if (idx == UFAIL) {
- if ((idx = H5O_alloc_new_chunk(f, oh, size)) == UFAIL)
- HGOTO_ERROR(H5E_OHDR, H5E_NOSPACE, UFAIL, "unable to create a new object header data chunk");
- }
+ tri_result = H5O_alloc_extend_chunk(f, oh, chunkno, size, &idx);
+
+ if ( tri_result == TRUE ) {
+
+ break;
+
+ } else if ( tri_result == FALSE ) {
+
+ idx = UFAIL;
+
+ } else {
+
+ HGOTO_ERROR(H5E_OHDR, H5E_SYSTEM, UFAIL, \
+ "H5O_alloc_extend_chunk failed unexpectedly");
+ }
+ }
+
+ /* if idx is still UFAIL, we were not able to extend a chunk.
+ * Create a new one.
+ */
+ if (idx == UFAIL) {
+
+ if ( (idx = H5O_alloc_new_chunk(f, dxpl_id, oh, size)) == UFAIL ) {
+
+ HGOTO_ERROR(H5E_OHDR, H5E_NOSPACE, UFAIL, \
+ "unable to create a new object header data chunk");
+ }
+ }
}
/* Set pointer to newly allocated message */
- msg=&oh->mesg[idx];
+ msg = &(oh->mesg[idx]);
/* do we need to split the null message? */
if (msg->raw_size > aligned_size) {
+
H5O_mesg_t *null_msg; /* Pointer to null message */
- size_t mesg_size = aligned_size+ H5O_SIZEOF_MSGHDR(f); /* Total size of newly allocated message */
- assert(msg->raw_size - aligned_size >= H5O_SIZEOF_MSGHDR(f));
+ size_t mesg_size = aligned_size + H5O_SIZEOF_MSGHDR(f);
+ /* Total size of newly allocated message */
- if (oh->nmesgs >= oh->alloc_nmesgs) {
- int old_alloc=oh->alloc_nmesgs;
- unsigned na = oh->alloc_nmesgs + H5O_NMESGS;
- H5O_mesg_t *x = H5FL_SEQ_REALLOC (H5O_mesg_t, oh->mesg, (size_t)na);
+ HDassert( msg->raw_size - aligned_size >= H5O_SIZEOF_MSGHDR(f) );
- if (!x)
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed");
- oh->alloc_nmesgs = na;
- oh->mesg = x;
+ if (oh->nmesgs >= oh->alloc_nmesgs) {
+
+ int old_alloc=oh->alloc_nmesgs;
+ unsigned na = oh->alloc_nmesgs + H5O_NMESGS;
+ H5O_mesg_t *x = H5FL_SEQ_REALLOC(H5O_mesg_t, oh->mesg, (size_t)na);
- /* Set new object header info to zeros */
- HDmemset(&oh->mesg[old_alloc],0,
- (oh->alloc_nmesgs-old_alloc)*sizeof(H5O_mesg_t));
+ if (!x) {
- /* "Retarget" local 'msg' pointer into newly allocated array of messages */
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \
+ "memory allocation failed");
+ }
+ oh->alloc_nmesgs = na;
+ oh->mesg = x;
+
+ /* Set new object header info to zeros */
+ HDmemset(&oh->mesg[old_alloc],0,
+ (oh->alloc_nmesgs-old_alloc)*sizeof(H5O_mesg_t));
+
+ /* "Retarget" local 'msg' pointer into newly allocated array
+ * of messages
+ */
msg=&oh->mesg[idx];
- }
- null_msg=&oh->mesg[oh->nmesgs++];
- null_msg->type = H5O_NULL;
- null_msg->dirty = TRUE;
- null_msg->native = NULL;
- null_msg->raw = msg->raw + mesg_size;
- null_msg->raw_size = msg->raw_size - mesg_size;
- null_msg->chunkno = msg->chunkno;
- msg->raw_size = aligned_size;
+ }
+ null_msg = &(oh->mesg[oh->nmesgs++]);
+ null_msg->type = H5O_NULL;
+ null_msg->dirty = TRUE;
+ null_msg->native = NULL;
+ null_msg->raw = msg->raw + mesg_size;
+ null_msg->raw_size = msg->raw_size - mesg_size;
+ null_msg->chunkno = msg->chunkno;
+ msg->raw_size = aligned_size;
}
/* initialize the new message */
@@ -3289,11 +3542,12 @@ H5O_alloc(H5F_t *f, H5O_t *oh, const H5O_class_t *type, size_t size, unsigned *
*oh_flags_ptr |= H5AC__DIRTIED_FLAG;
/* Set return value */
- ret_value=idx;
+ ret_value = idx;
done:
FUNC_LEAVE_NOAPI(ret_value);
-}
+
+} /* H5O_alloc() */
#ifdef NOT_YET
@@ -3956,7 +4210,7 @@ H5O_iterate_real(const H5G_entry_t *ent, const H5O_class_t *type, H5AC_protect_t
if(oh_flags & H5AC__DIRTIED_FLAG) {
/* Shouldn't be able to modify object header if we don't have write access */
HDassert(prot == H5AC_WRITE);
- H5O_touch_oh(ent->file, oh, FALSE, &oh_flags);
+ H5O_touch_oh(ent->file, dxpl_id, oh, FALSE, &oh_flags);
} /* end if */
done:
diff --git a/src/H5Oprivate.h b/src/H5Oprivate.h
index 8a9dd7d..e3043af 100644
--- a/src/H5Oprivate.h
+++ b/src/H5Oprivate.h
@@ -268,11 +268,11 @@ H5_DLL herr_t H5O_unprotect(H5G_entry_t *ent, struct H5O_t *oh, hid_t dxpl_id,
H5_DLL int H5O_append(H5F_t *f, hid_t dxpl_id, struct H5O_t *oh, unsigned type_id,
unsigned flags, const void *mesg, unsigned * oh_flags_ptr);
H5_DLL herr_t H5O_touch(H5G_entry_t *ent, hbool_t force, hid_t dxpl_id);
-H5_DLL herr_t H5O_touch_oh(H5F_t *f, struct H5O_t *oh, hbool_t force,
- unsigned * oh_flags_ptr);
+H5_DLL herr_t H5O_touch_oh(H5F_t *f, hid_t dxpl_id, struct H5O_t *oh,
+ hbool_t force, unsigned * oh_flags_ptr);
#ifdef H5O_ENABLE_BOGUS
H5_DLL herr_t H5O_bogus(H5G_entry_t *ent, hid_t dxpl_id);
-H5_DLL herr_t H5O_bogus_oh(H5F_t *f, struct H5O_t *oh,
+H5_DLL herr_t H5O_bogus_oh(H5F_t *f, hid_t dxpl_id, struct H5O_t *oh,
unsigned * oh_flags_ptr);
#endif /* H5O_ENABLE_BOGUS */
H5_DLL herr_t H5O_remove(H5G_entry_t *ent, unsigned type_id, int sequence,