summaryrefslogtreecommitdiffstats
path: root/src/H5AC.c
diff options
context:
space:
mode:
authorJohn Mainzer <mainzer@hdfgroup.org>2005-09-27 05:20:11 (GMT)
committerJohn Mainzer <mainzer@hdfgroup.org>2005-09-27 05:20:11 (GMT)
commitc100b0bf2639c03579ce1b2c4013b36c6f40350b (patch)
tree5c3b2834af1206110243886f357857900a8b108e /src/H5AC.c
parentf9fc749ca218a878dbea4022ba1c2fb527f7822c (diff)
downloadhdf5-c100b0bf2639c03579ce1b2c4013b36c6f40350b.zip
hdf5-c100b0bf2639c03579ce1b2c4013b36c6f40350b.tar.gz
hdf5-c100b0bf2639c03579ce1b2c4013b36c6f40350b.tar.bz2
[svn-r11470] Purpose:
Repair synchronization bug in the metadata cache in PHDF5 Also repair numerous other bugs that surfaced in testing the bug fix. Description: While operations modifying metadata must be collective, we allow independant reads. This allows metadata caches on different processes to adjust to different sizes, and to place the entries on their dirty lists in different orders. Since only process 0 actually writes metadata to disk (all other processes thought they did, but the writes were discarded on the theory that they had to be collective), this made it possible for another process to modify metadata, flush it, and then read it back in in its original form (pre-modification) form. The possibilities for file corruption should be obvious. Solution: Make the policy that only process 0 can write to file explicit, and visible to the metadata caches. Thus only process 0 may flush dirty entries -- all other caches must retain dirty entries until they are informed by process 0 that the entries are clean. Synchronization is handled by counting the bytes of dirty cache entries created, and then synching up between the caches whenever the sum exceeds an (eventually user specified) limit. Dirty metadata creation is consistent across all processes because all operations modifying metadata must be collective. This change uncovered may bugs which are repaired in this checkin. It also required modification of H5HL and H5O to allocate file space on insertion rather than on flush from cache. Platforms tested: H5committest, heping(parallel & serial) Misc. update:
Diffstat (limited to 'src/H5AC.c')
-rw-r--r--src/H5AC.c2106
1 files changed, 2067 insertions, 39 deletions
diff --git a/src/H5AC.c b/src/H5AC.c
index d4b70d2..eff9137 100644
--- a/src/H5AC.c
+++ b/src/H5AC.c
@@ -42,18 +42,24 @@
*-------------------------------------------------------------------------
*/
+#define H5C_PACKAGE /*suppress error about including H5Cpkg */
#define H5F_PACKAGE /*suppress error about including H5Fpkg */
/* Interface initialization */
#define H5_INTERFACE_INIT_FUNC H5AC_init_interface
+#ifdef H5_HAVE_PARALLEL
+#include <mpi.h>
+#endif /* H5_HAVE_PARALLEL */
#include "H5private.h" /* Generic Functions */
#include "H5ACprivate.h" /* Metadata cache */
+#include "H5Cpkg.h" /* Cache */
#include "H5Dprivate.h" /* Dataset functions */
#include "H5Eprivate.h" /* Error handling */
#include "H5Fpkg.h" /* Files */
#include "H5FDprivate.h" /* File drivers */
+#include "H5FLprivate.h" /* Free Lists */
#include "H5Iprivate.h" /* IDs */
#include "H5Pprivate.h" /* Property lists */
@@ -62,6 +68,299 @@
#include "H5FPprivate.h" /* Flexible PHDF5 */
#endif /* H5_HAVE_FPHDF5 */
+#define H5AC_DEBUG_DIRTY_BYTES_CREATION 0
+
+/****************************************************************************
+ *
+ * structure H5AC_aux_t
+ *
+ * While H5AC has become a wrapper for the cache implemented in H5C.c, there
+ * are some features of the metadata cache that are specific to it, and which
+ * therefore do not belong in the more generic H5C cache code.
+ *
+ * In particular, there is the matter of synchronizing writes from the
+ * metadata cache to disk in the PHDF5 case.
+ *
+ * Prior to this update, the presumption was that all metadata caches would
+ * write the same data at the same time since all operations modifying
+ * metadata must be performed collectively. Given this assumption, it was
+ * safe to allow only the writes from process 0 to actually make it to disk,
+ * while metadata writes from all other processes were discarded.
+ *
+ * Unfortunately, this presumption is in error as operations that read
+ * metadata need not be collective, but can change the location of dirty
+ * entries in the metadata cache LRU lists. This can result in the same
+ * metadata write operation triggering writes from the metadata caches on
+ * some processes, but not all (causing a hang), or in different sets of
+ * entries being written from different caches (potentially resulting in
+ * metadata corruption in the file).
+ *
+ * To deal with this issue, I decided to apply a paradigm shift to the way
+ * metadata is written to disk.
+ *
+ * With this set of changes, only the metadata cache on process 0 is able
+ * to write metadata to disk, although metadata caches on all other
+ * processes can read metadata from disk as before.
+ *
+ * To keep all the other caches from getting plugged up with dirty metadata,
+ * process 0 periodically broadcasts a list of entries that it has flushed
+ * since that last notice, and which are currently clean. The other caches
+ * mark these entries as clean as well, which allows them to evict the
+ * entries as needed.
+ *
+ * One obvious problem in this approach is synchronizing the broadcasts
+ * and receptions, as different caches may see different amounts of
+ * activity.
+ *
+ * The current solution is for the caches to track the number of bytes
+ * of newly generated dirty metadata, and to broadcast and receive
+ * whenever this value exceeds some user specified threshold.
+ *
+ * Maintaining this count is easy for all processes not on process 0 --
+ * all that is necessary is to add the size of the entry to the total
+ * whenever there is an insertion, a rename of a previously clean entry,
+ * or whever a previously clean entry is marked dirty in an unprotect.
+ *
+ * On process 0, we have to be careful not to count dirty bytes twice.
+ * If an entry is marked dirty, flushed, and marked dirty again, all
+ * within a single reporting period, it only th first marking should
+ * be added to the dirty bytes generated tally, as that is all that
+ * the other processes will see.
+ *
+ * At present, this structure exists to maintain the fields needed to
+ * implement the above scheme, and thus is only used in the parallel
+ * case. However, other uses may arise in the future.
+ *
+ * Instance of this structure are associated with metadata caches via
+ * the aux_ptr field of H5C_t (see H5Cpkg.h). The H5AC code is
+ * responsible for allocating, maintaining, and discarding instances
+ * of H5AC_aux_t.
+ *
+ * The remainder of this header comments documents the individual fields
+ * of the structure.
+ *
+ * JRM - 6/27/05
+ *
+ * magic: Unsigned 32 bit integer always set to
+ * H5AC__H5AC_AUX_T_MAGIC. This field is used to validate
+ * pointers to instances of H5AC_aux_t.
+ *
+ * mpi_comm: MPI communicator associated with the file for which the
+ * cache has been created.
+ *
+ * mpi_rank: MPI rank of this process within mpi_comm.
+ *
+ * mpi_size: Number of processes in mpi_comm.
+ *
+ * write_permitted: Boolean flag used to control whether the cache
+ * is permitted to write to file.
+ *
+ * dirty_bytes_threshold: Integer field containing the dirty bytes
+ * generation threashold. Whenever dirty byte creation
+ * exceeds this value, the metadata cache on process 0
+ * broadcasts a list of the entries it has flushed since
+ * the last broadcast (or since the beginning of execution)
+ * and which are currently clean (if they are still in the
+ * cache)
+ *
+ * Similarly, metadata caches on processes other than process
+ * 0 will attempt to receive a list of clean entries whenever
+ * the threshold is exceeded.
+ *
+ * dirty_bytes: Integer field containing the number of bytes of dirty
+ * metadata generated since the beginning of the computation,
+ * or (more typically) since the last clean entries list
+ * broadcast. This field is reset to zero after each such
+ * broadcast.
+ *
+ * dirty_bytes_propagations: This field only exists when the
+ * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
+ *
+ * It is used to track the number of times the cleaned list
+ * has been propagated from process 0 to the other
+ * processes.
+ *
+ * unprotect_dirty_bytes: This field only exists when the
+ * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
+ *
+ * It is used to track the number of dirty bytes created
+ * via unprotect operations since the last time the cleaned
+ * list was propagated.
+ *
+ * unprotect_dirty_bytes_updates: This field only exists when the
+ * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
+ *
+ * It is used to track the number of times dirty bytes have
+ * been created via unprotect operations since the last time
+ * the cleaned list was propagated.
+ *
+ * insert_dirty_bytes: This field only exists when the
+ * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
+ *
+ * It is used to track the number of dirty bytes created
+ * via insert operations since the last time the cleaned
+ * list was propagated.
+ *
+ * insert_dirty_bytes_updates: This field only exists when the
+ * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
+ *
+ * It is used to track the number of times dirty bytes have
+ * been created via insert operations since the last time
+ * the cleaned list was propagated.
+ *
+ * rename_dirty_bytes: This field only exists when the
+ * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
+ *
+ * It is used to track the number of dirty bytes created
+ * via rename operations since the last time the cleaned
+ * list was propagated.
+ *
+ * rename_dirty_bytes_updates: This field only exists when the
+ * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE.
+ *
+ * It is used to track the number of times dirty bytes have
+ * been created via rename operations since the last time
+ * the cleaned list was propagated.
+ *
+ * d_slist_ptr: Pointer to an instance of H5SL_t used to maintain a list
+ * of entries that have been dirtied since the last time they
+ * were listed in a clean entries broadcast. This list is
+ * only maintained by the metadata cache on process 0 -- it
+ * it used to maintain a view of the dirty entries as seen
+ * by the other caches, so as to keep the dirty bytes count
+ * in synchronization with them.
+ *
+ * Thus on process 0, the dirty_bytes count is incremented
+ * only if either
+ *
+ * 1) an entry is inserted in the metadata cache, or
+ *
+ * 2) a previously clean entry is renamed, and it does not
+ * already appear in the dirty entry list, or
+ *
+ * 3) a previously clean entry is unprotected with the
+ * dirtied flag set and the entry does not already appear
+ * in the dirty entry list.
+ *
+ * Entries are added to the dirty entry list whever they cause
+ * the dirty bytes count to be increased. They are removed
+ * when they appear in a clean entries broadcast. Note that
+ * renames must be reflected in the dirty entry list.
+ *
+ * To reitterate, this field is only used on process 0 -- it
+ * should be NULL on all other processes.
+ *
+ * d_slist_len: Integer field containing the number of entries in the
+ * dirty entry list. This field should always contain the
+ * value 0 on all processes other than process 0. It exists
+ * primarily for sanity checking.
+ *
+ * c_slist_ptr: Pointer to an instance of H5SL_t used to maintain a list
+ * of entries that were dirty, have been flushed
+ * to disk since the last clean entries broadcast, and are
+ * still clean. Since only process 0 can write to disk, this
+ * list only exists on process 0.
+ *
+ * In essence, this slist is used to assemble the contents of
+ * the next clean entries broadcast. The list emptied after
+ * each broadcast.
+ *
+ * c_slist_len: Integer field containing the number of entries in the clean
+ * entries list (*c_slist_ptr). This field should always
+ * contain the value 0 on all processes other than process 0.
+ * It exists primarily for sanity checking.
+ *
+ ****************************************************************************/
+
+#ifdef H5_HAVE_PARALLEL
+
+#define H5AC__H5AC_AUX_T_MAGIC (unsigned)0x00D0A01
+
+typedef struct H5AC_aux_t
+{
+ uint32_t magic;
+
+ MPI_Comm mpi_comm;
+
+ int mpi_rank;
+
+ int mpi_size;
+
+ hbool_t write_permitted;
+
+ int32_t dirty_bytes_threshold;
+
+ int32_t dirty_bytes;
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+
+ int32_t dirty_bytes_propagations;
+
+ int32_t unprotect_dirty_bytes;
+ int32_t unprotect_dirty_bytes_updates;
+
+ int32_t insert_dirty_bytes;
+ int32_t insert_dirty_bytes_updates;
+
+ int32_t rename_dirty_bytes;
+ int32_t rename_dirty_bytes_updates;
+
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+
+ H5SL_t * d_slist_ptr;
+
+ int32_t d_slist_len;
+
+ H5SL_t * c_slist_ptr;
+
+ int32_t c_slist_len;
+
+} H5AC_aux_t; /* struct H5AC_aux_t */
+
+/* Declare a free list to manage the H5AC_aux_t struct */
+H5FL_DEFINE_STATIC(H5AC_aux_t);
+
+#endif /* H5_HAVE_PARALLEL */
+
+/****************************************************************************
+ *
+ * structure H5AC_slist_entry_t
+ *
+ * The dirty entry list maintained via the d_slist_ptr field of H5AC_aux_t
+ * and the cleaned entry list maintained via the c_slist_ptr field of
+ * H5AC_aux_t are just lists of the file offsets of the dirty/cleaned
+ * entries. Unfortunately, the slist code makes us define a dynamically
+ * allocated structure to store these offsets in. This structure serves
+ * that purpose. Its fields are as follows:
+ *
+ * magic: Unsigned 32 bit integer always set to
+ * H5AC__H5AC_SLIST_ENTRY_T_MAGIC. This field is used to
+ * validate pointers to instances of H5AC_slist_entry_t.
+ *
+ * addr: file offset of a metadata entry. Entries are added to this
+ * list (if they aren't there already) when they are marked
+ * dirty in an unprotect, inserted, or renamed. They are
+ * removed when they appear in a clean entries broadcast.
+ *
+ ****************************************************************************/
+
+#ifdef H5_HAVE_PARALLEL
+
+#define H5AC__H5AC_SLIST_ENTRY_T_MAGIC 0x00D0A02
+
+typedef struct H5AC_slist_entry_t
+{
+ uint32_t magic;
+
+ haddr_t addr;
+} H5AC_slist_entry_t;
+
+/* Declare a free list to manage the H5AC_slist_entry_t struct */
+H5FL_DEFINE_STATIC(H5AC_slist_entry_t);
+
+#endif /* H5_HAVE_PARALLEL */
+
+
/*
* Private file-scope variables.
*/
@@ -90,6 +389,55 @@ static herr_t H5AC_check_if_write_permitted(const H5F_t *f,
hid_t dxpl_id,
hbool_t * write_permitted_ptr);
+#ifdef H5_HAVE_PARALLEL
+static herr_t H5AC_broadcast_clean_list(H5AC_t * cache_ptr);
+
+static herr_t H5AC_log_deleted_entry(H5AC_t * cache_ptr,
+ H5AC_info_t * entry_ptr,
+ haddr_t addr,
+ unsigned int flags);
+
+static herr_t H5AC_log_dirtied_entry(H5AC_t * cache_ptr,
+ H5C_cache_entry_t * entry_ptr,
+ haddr_t addr,
+ hbool_t size_changed,
+ size_t new_size);
+
+static herr_t H5AC_log_flushed_entry(H5C_t * cache_ptr,
+ haddr_t addr,
+ hbool_t was_dirty,
+ unsigned flags,
+ int type_id);
+
+#if 0 /* this is useful debugging code -- JRM */
+static herr_t H5AC_log_flushed_entry_dummy(H5C_t * cache_ptr,
+ haddr_t addr,
+ hbool_t was_dirty,
+ unsigned flags,
+ int type_id);
+#endif /* JRM */
+
+static herr_t H5AC_log_inserted_entry(H5F_t * f,
+ H5AC_t * cache_ptr,
+ H5AC_info_t * entry_ptr,
+ const H5AC_class_t * type,
+ haddr_t addr);
+
+static herr_t H5AC_propagate_flushed_and_still_clean_entries_list(H5F_t * f,
+ hid_t dxpl_id,
+ H5AC_t * cache_ptr,
+ hbool_t do_barrier);
+
+static herr_t H5AC_receive_and_apply_clean_list(H5F_t * f,
+ hid_t primary_dxpl_id,
+ hid_t secondary_dxpl_id,
+ H5AC_t * cache_ptr);
+
+static herr_t H5AC_log_renamed_entry(H5AC_t * cache_ptr,
+ haddr_t old_addr,
+ haddr_t new_addr);
+#endif /* H5_HAVE_PARALLEL */
+
/*-------------------------------------------------------------------------
* Function: H5AC_init
@@ -351,6 +699,13 @@ H5AC_term_interface(void)
* through the function.
* JRM - 4/7/05
*
+ * Added code allocating and initializing the auxilary
+ * structure (an instance of H5AC_aux_t), and linking it
+ * to the instance of H5C_t created by H5C_create(). At
+ * present, the auxilary structure is only used in PHDF5.
+ *
+ * JRM - 6/28/05
+ *
*-------------------------------------------------------------------------
*/
@@ -376,6 +731,12 @@ H5AC_create(const H5F_t *f,
{
herr_t ret_value = SUCCEED; /* Return value */
herr_t result;
+#ifdef H5_HAVE_PARALLEL
+ MPI_Comm mpi_comm = MPI_COMM_NULL;
+ int mpi_rank = -1;
+ int mpi_size = -1;
+ H5AC_aux_t * aux_ptr = NULL;
+#endif /* H5_HAVE_PARALLEL */
FUNC_ENTER_NOAPI(H5AC_create, FAIL)
@@ -390,15 +751,138 @@ H5AC_create(const H5F_t *f,
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Bad cache configuration");
}
- /* The default max cache size and min clean size will frequently be
- * overwritten shortly by the subsequent set resize config call.
- * -- JRM
- */
- f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE,
- H5AC__DEFAULT_MIN_CLEAN_SIZE,
- (H5AC_NTYPES - 1),
- (const char **)H5AC_entry_type_names,
- H5AC_check_if_write_permitted);
+#ifdef H5_HAVE_PARALLEL
+ if ( IS_H5FD_MPI(f) ) {
+
+ if ( (mpi_comm = H5F_mpi_get_comm(f)) == MPI_COMM_NULL ) {
+
+ HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, \
+ "can't get MPI communicator")
+ }
+
+ if ( (mpi_rank = H5F_mpi_get_rank(f)) < 0 ) {
+
+ HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get mpi rank")
+ }
+
+ if ( (mpi_size = H5F_mpi_get_size(f)) < 0 ) {
+
+ HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get mpi size")
+ }
+
+ /* There is no point in setting up the auxilary structure if size
+ * is less than or equal to 1, as there will never be any processes
+ * to broadcast the clean lists to.
+ */
+ if ( mpi_size > 1 ) {
+
+ if ( NULL == (aux_ptr = H5FL_CALLOC(H5AC_aux_t)) ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "Can't allocate H5AC auxilary structure.")
+
+ } else {
+
+ aux_ptr->magic = H5AC__H5AC_AUX_T_MAGIC;
+ aux_ptr->mpi_comm = mpi_comm;
+ aux_ptr->mpi_rank = mpi_rank;
+ aux_ptr->mpi_size = mpi_size;
+ aux_ptr->write_permitted = FALSE;
+ aux_ptr->dirty_bytes_threshold = 256 * 1024;
+ aux_ptr->dirty_bytes = 0;
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->dirty_bytes_propagations = 0;
+ aux_ptr->unprotect_dirty_bytes = 0;
+ aux_ptr->unprotect_dirty_bytes_updates = 0;
+ aux_ptr->insert_dirty_bytes = 0;
+ aux_ptr->insert_dirty_bytes_updates = 0;
+ aux_ptr->rename_dirty_bytes = 0;
+ aux_ptr->rename_dirty_bytes_updates = 0;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+ aux_ptr->d_slist_ptr = NULL;
+ aux_ptr->d_slist_len = 0;
+ aux_ptr->c_slist_ptr = NULL;
+ aux_ptr->c_slist_len = 0;
+ }
+
+ if ( mpi_rank == 0 ) {
+
+ aux_ptr->d_slist_ptr =
+ H5SL_create(H5SL_TYPE_HADDR,0.5,(size_t)16);
+
+ if ( aux_ptr->d_slist_ptr == NULL ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL,
+ "can't create dirtied entry list.")
+ }
+
+ aux_ptr->c_slist_ptr =
+ H5SL_create(H5SL_TYPE_HADDR,0.5,(size_t)16);
+
+ if ( aux_ptr->c_slist_ptr == NULL ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL,
+ "can't create cleaned entry list.")
+ }
+ }
+ }
+
+ if ( aux_ptr != NULL ) {
+
+ if ( aux_ptr->mpi_rank == 0 ) {
+
+ f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE,
+ H5AC__DEFAULT_MIN_CLEAN_SIZE,
+ (H5AC_NTYPES - 1),
+ (const char **)H5AC_entry_type_names,
+ H5AC_check_if_write_permitted,
+ TRUE,
+ H5AC_log_flushed_entry,
+ (void *)aux_ptr);
+
+ } else {
+
+ f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE,
+ H5AC__DEFAULT_MIN_CLEAN_SIZE,
+ (H5AC_NTYPES - 1),
+ (const char **)H5AC_entry_type_names,
+ NULL,
+ FALSE,
+#if 0 /* this is useful debugging code -- keep it for a while */ /* JRM */
+ H5AC_log_flushed_entry_dummy,
+#else /* JRM */
+ NULL,
+#endif /* JRM */
+ (void *)aux_ptr);
+ }
+ } else {
+
+ f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE,
+ H5AC__DEFAULT_MIN_CLEAN_SIZE,
+ (H5AC_NTYPES - 1),
+ (const char **)H5AC_entry_type_names,
+ H5AC_check_if_write_permitted,
+ TRUE,
+ NULL,
+ NULL);
+ }
+ } else {
+#endif /* H5_HAVE_PARALLEL */
+ /* The default max cache size and min clean size will frequently be
+ * overwritten shortly by the subsequent set resize config call.
+ * -- JRM
+ */
+ f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE,
+ H5AC__DEFAULT_MIN_CLEAN_SIZE,
+ (H5AC_NTYPES - 1),
+ (const char **)H5AC_entry_type_names,
+ H5AC_check_if_write_permitted,
+ TRUE,
+ NULL,
+ NULL);
+#ifdef H5_HAVE_PARALLEL
+ }
+#endif /* H5_HAVE_PARALLEL */
if ( NULL == f->shared->cache ) {
@@ -416,6 +900,31 @@ H5AC_create(const H5F_t *f,
done:
+#ifdef H5_HAVE_PARALLEL
+
+ /* if there is a failure, try to tidy up the auxilary structure */
+
+ if ( ret_value != SUCCEED ) {
+
+ if ( aux_ptr != NULL ) {
+
+ if ( aux_ptr->d_slist_ptr != NULL ) {
+
+ H5SL_close(aux_ptr->d_slist_ptr);
+ }
+
+ if ( aux_ptr->c_slist_ptr != NULL ) {
+
+ H5SL_close(aux_ptr->c_slist_ptr);
+ }
+
+ aux_ptr->magic = 0;
+ H5FL_FREE(H5AC_aux_t, aux_ptr);
+ aux_ptr = NULL;
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
FUNC_LEAVE_NOAPI(ret_value)
} /* H5AC_create() */
@@ -445,6 +954,10 @@ done:
*
* JRM - 6/7/04
*
+ * Added code to free the auxiliary structure and its
+ * associated slist if present.
+ * JRM - 6/28/05
+ *
*-------------------------------------------------------------------------
*/
herr_t
@@ -452,12 +965,23 @@ H5AC_dest(H5F_t *f, hid_t dxpl_id)
{
H5AC_t *cache = NULL;
herr_t ret_value=SUCCEED; /* Return value */
+#ifdef H5_HAVE_PARALLEL
+ H5AC_aux_t * aux_ptr = NULL;
+#endif /* H5_HAVE_PARALLEL */
FUNC_ENTER_NOAPI(H5AC_dest, FAIL)
assert(f);
assert(f->shared->cache);
cache = f->shared->cache;
+#ifdef H5_HAVE_PARALLEL
+ aux_ptr = cache->aux_ptr;
+
+ if ( aux_ptr != NULL ) {
+
+ HDassert ( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ }
+#endif /* H5_HAVE_PARALLEL */
f->shared->cache = NULL;
@@ -466,6 +990,25 @@ H5AC_dest(H5F_t *f, hid_t dxpl_id)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "can't destroy cache")
}
+#ifdef H5_HAVE_PARALLEL
+ if ( aux_ptr != NULL ) {
+
+ if ( aux_ptr->d_slist_ptr != NULL ) {
+
+ H5SL_close(aux_ptr->d_slist_ptr);
+ }
+
+ if ( aux_ptr->c_slist_ptr != NULL ) {
+
+ H5SL_close(aux_ptr->c_slist_ptr);
+ }
+
+ aux_ptr->magic = 0;
+ H5FL_FREE(H5AC_aux_t, aux_ptr);
+ aux_ptr = NULL;
+ }
+#endif /* H5_HAVE_PARALLEL */
+
done:
FUNC_LEAVE_NOAPI(ret_value)
@@ -529,28 +1072,103 @@ done:
*
* Complete re-write. See above for details. -- JRM 5/11/04
*
- * Abstracted the guts of the function to H5C_dest() in H5C.c,
- * and then re-wrote the function as a wrapper for H5C_dest().
+ * Abstracted the guts of the function to H5C_flush_cache()
+ * in H5C.c, and then re-wrote the function as a wrapper for
+ * H5C_flush_cache().
*
* JRM - 6/7/04
*
+ * JRM - 7/5/05
+ * Modified function as part of a fix for a cache coherency
+ * bug in PHDF5. See the header comments on the H5AC_aux_t
+ * structure for details.
+ *
*-------------------------------------------------------------------------
*/
herr_t
H5AC_flush(H5F_t *f, hid_t dxpl_id, unsigned flags)
{
- herr_t status;
- herr_t ret_value=SUCCEED; /* Return value */
+ herr_t status;
+ herr_t ret_value = SUCCEED; /* Return value */
+#ifdef H5_HAVE_PARALLEL
+ H5AC_aux_t * aux_ptr = NULL;
+ int mpi_code;
+#endif /* H5_HAVE_PARALLEL */
+
FUNC_ENTER_NOAPI(H5AC_flush, FAIL)
HDassert(f);
HDassert(f->shared->cache);
- status = H5C_flush_cache(f,
- dxpl_id,
- H5AC_noblock_dxpl_id,
- f->shared->cache,
+#ifdef H5_HAVE_PARALLEL
+ aux_ptr = f->shared->cache->aux_ptr;
+
+ if ( aux_ptr != NULL ) {
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ HDfprintf(stdout,
+ "%d::H5AC_flush: (u/uu/i/iu/r/ru) = %d/%d/%d/%d/%d/%d\n",
+ (int)(aux_ptr->mpi_rank),
+ (int)(aux_ptr->unprotect_dirty_bytes),
+ (int)(aux_ptr->unprotect_dirty_bytes_updates),
+ (int)(aux_ptr->insert_dirty_bytes),
+ (int)(aux_ptr->insert_dirty_bytes_updates),
+ (int)(aux_ptr->rename_dirty_bytes),
+ (int)(aux_ptr->rename_dirty_bytes_updates));
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+
+ /* to prevent "messages from the future" we must synchronize all
+ * processes before we start the flush. Hence the following
+ * barrier.
+ */
+ if ( MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)) ) {
+
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code)
+ }
+
+ /* if the clear only flag is set, this flush will not involve any
+ * disk I/O. In such cases, it is not necessary to let process 0
+ * flush first.
+ */
+ if ( ( aux_ptr->mpi_rank == 0 ) &&
+ ( (flags & H5AC__FLUSH_CLEAR_ONLY_FLAG) != 0 ) ) {
+
+ unsigned init_flush_flags = H5AC__NO_FLAGS_SET;
+
+ if ( ( (flags & H5AC__FLUSH_MARKED_ENTRIES_FLAG) != 0 ) &&
+ ( (flags & H5AC__FLUSH_INVALIDATE_FLAG) == 0 ) ) {
+
+ init_flush_flags |= H5AC__FLUSH_MARKED_ENTRIES_FLAG;
+ }
+
+ aux_ptr->write_permitted = TRUE;
+
+ status = H5C_flush_cache(f,
+ H5AC_noblock_dxpl_id,
+ H5AC_noblock_dxpl_id,
+ f->shared->cache,
+ init_flush_flags);
+
+ aux_ptr->write_permitted = FALSE;
+
+ if ( status < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.")
+ }
+ } /* end if ( aux_ptr->mpi_rank == 0 ) */
+
+ status = H5AC_propagate_flushed_and_still_clean_entries_list(f,
+ H5AC_noblock_dxpl_id,
+ f->shared->cache,
+ FALSE);
+ } /* end if ( aux_ptr != NULL ) */
+#endif /* H5_HAVE_PARALLEL */
+
+ status = H5C_flush_cache(f,
+ dxpl_id,
+ H5AC_noblock_dxpl_id,
+ f->shared->cache,
flags);
if ( status < 0 ) {
@@ -616,8 +1234,16 @@ done:
* moving management of the dirty flag on cache entries into
* the cache code.
*
+ * JRM - 7/5/05
+ * Added code to track dirty byte generation, and to trigger
+ * clean entry list propagation when it exceeds a user
+ * specified threshold. Note that this code only applies in
+ * the PHDF5 case. It should have no effect on either the
+ * serial or FPHSD5 cases.
+ *
*-------------------------------------------------------------------------
*/
+
herr_t
H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void *thing, unsigned int flags)
{
@@ -625,6 +1251,9 @@ H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void *
H5AC_info_t *info;
H5AC_t *cache;
herr_t ret_value=SUCCEED; /* Return value */
+#ifdef H5_HAVE_PARALLEL
+ H5AC_aux_t * aux_ptr = NULL;
+#endif /* H5_HAVE_PARALLEL */
FUNC_ENTER_NOAPI(H5AC_set, FAIL)
@@ -717,6 +1346,23 @@ H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void *
#endif /* H5_HAVE_FPHDF5 */
#endif /* H5_HAVE_PARALLEL */
+#ifdef H5_HAVE_PARALLEL
+ if ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) {
+
+ result = H5AC_log_inserted_entry(f,
+ f->shared->cache,
+ (H5AC_info_t *)thing,
+ type,
+ addr);
+
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, \
+ "H5AC_log_inserted_entry() failed.")
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
result = H5C_insert_entry(f,
dxpl_id,
H5AC_noblock_dxpl_id,
@@ -731,6 +1377,22 @@ H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void *
HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, "H5C_insert_entry() failed")
}
+#ifdef H5_HAVE_PARALLEL
+ if ( ( aux_ptr != NULL ) &&
+ ( aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold ) ) {
+
+ result = H5AC_propagate_flushed_and_still_clean_entries_list(f,
+ H5AC_noblock_dxpl_id,
+ f->shared->cache,
+ TRUE);
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
+ "Can't propagate clean entries list.")
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
done:
FUNC_LEAVE_NOAPI(ret_value)
@@ -766,6 +1428,18 @@ done:
* in H5C.c, and then re-wrote the function as a wrapper for
* H5C_rename_entry().
*
+ * JRM - 7/5/05
+ * Added code to track dirty byte generation, and to trigger
+ * clean entry list propagation when it exceeds a user
+ * specified threshold. Note that this code only applies in
+ * the PHDF5 case. It should have no effect on either the
+ * serial or FPHSD5 cases.
+ *
+ * Note that this code presumes that the renamed entry will
+ * be present in all caches -- which it must be at present.
+ * To maintain this invarient, only rename entries immediately
+ * after you unprotect them.
+ *
*-------------------------------------------------------------------------
*/
herr_t
@@ -773,6 +1447,9 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad
{
herr_t result;
herr_t ret_value=SUCCEED; /* Return value */
+#ifdef H5_HAVE_PARALLEL
+ H5AC_aux_t * aux_ptr = NULL;
+#endif /* H5_HAVE_PARALLEL */
FUNC_ENTER_NOAPI(H5AC_rename, FAIL)
@@ -808,6 +1485,13 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad
* In any case, don't check this code in without revisiting this
* issue.
* JRM -- 6/6/05
+ *
+ * On reflection, the code was already broken, as there was no
+ * way to advise the SAP that a renamed entry had changed its
+ * address, or was dirty. I will not worry about it for now,
+ * but the matter must be addressed if we ever get serious
+ * about FPHDF5.
+ * JRM -- 7/5/05
*/
HGOTO_DONE(SUCCEED);
@@ -816,6 +1500,21 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad
#endif /* H5_HAVE_FPHDF5 */
#endif /* H5_HAVE_PARALLEL */
+#ifdef H5_HAVE_PARALLEL
+ if ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) {
+
+ result = H5AC_log_renamed_entry(f->shared->cache,
+ old_addr,
+ new_addr);
+
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
+ "H5AC_log_renamed_entry() failed.")
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
result = H5C_rename_entry(f->shared->cache,
type,
old_addr,
@@ -827,6 +1526,22 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad
"H5C_rename_entry() failed.")
}
+#ifdef H5_HAVE_PARALLEL
+ if ( ( aux_ptr != NULL ) &&
+ ( aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold ) ) {
+
+ result = H5AC_propagate_flushed_and_still_clean_entries_list(f,
+ H5AC_noblock_dxpl_id,
+ f->shared->cache,
+ TRUE);
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
+ "Can't propagate clean entries list.")
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
done:
FUNC_LEAVE_NOAPI(ret_value)
@@ -837,7 +1552,7 @@ done:
/*-------------------------------------------------------------------------
* Function: H5AC_protect
*
- * Purpose: If the target entry is not in the cache, load it. If
+ * Purpose: If the target entry is not in the cache, load it. If
* necessary, attempt to evict one or more entries to keep
* the cache within its maximum size.
*
@@ -845,8 +1560,8 @@ done:
* to the caller. The caller must call H5AC_unprotect() when
* finished with the entry.
*
- * While it is protected, the entry may not be either evicted
- * or flushed -- nor may it be accessed by another call to
+ * While it is protected, the entry may not be either evicted
+ * or flushed -- nor may it be accessed by another call to
* H5AC_protect. Any attempt to do so will result in a failure.
*
* This comment is a re-write of the original Purpose: section.
@@ -887,8 +1602,8 @@ done:
* Purpose section above.
*
* JRM - 6/7/04
- * Abstracted the guts of the function to H5C_protect()
- * in H5C.c, and then re-wrote the function as a wrapper for
+ * Abstracted the guts of the function to H5C_protect()
+ * in H5C.c, and then re-wrote the function as a wrapper for
* H5C_protect().
*
*-------------------------------------------------------------------------
@@ -1108,6 +1823,18 @@ done:
* part of a collection of changes directed at moving
* management of cache entry dirty flags into the H5C code.
*
+ * JRM - 7/5/05
+ * Added code to track dirty byte generation, and to trigger
+ * clean entry list propagation when it exceeds a user
+ * specified threshold. Note that this code only applies in
+ * the PHDF5 case. It should have no effect on either the
+ * serial or FPHSD5 cases.
+ *
+ * JRM - 9/8/05
+ * Added code to track entry size changes. This is necessary
+ * as it can effect dirty byte creation counts, thereby
+ * throwing the caches out of sync in the PHDF5 case.
+ *
*-------------------------------------------------------------------------
*/
herr_t
@@ -1116,6 +1843,12 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
{
herr_t result;
herr_t ret_value=SUCCEED; /* Return value */
+ hbool_t size_changed = FALSE;
+ hbool_t dirtied;
+ size_t new_size = 0;
+#ifdef H5_HAVE_PARALLEL
+ H5AC_aux_t * aux_ptr = NULL;
+#endif /* H5_HAVE_PARALLEL */
FUNC_ENTER_NOAPI(H5AC_unprotect, FAIL)
@@ -1129,6 +1862,23 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
HDassert( ((H5AC_info_t *)thing)->addr == addr );
HDassert( ((H5AC_info_t *)thing)->type == type );
+ dirtied = ((flags & H5AC__DIRTIED_FLAG) == H5AC__DIRTIED_FLAG );
+
+ if ( dirtied ) {
+
+ if ( (type->size)(f, thing, &new_size) < 0 ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_CANTGETSIZE, FAIL, \
+ "Can't get size of thing")
+ }
+
+ if ( ((H5AC_info_t *)thing)->size != new_size ) {
+
+ size_changed = TRUE;
+ flags = flags | H5AC__SIZE_CHANGED_FLAG;
+ }
+ }
+
#ifdef H5_HAVE_PARALLEL
#ifdef H5_HAVE_FPHDF5
/* The following code to support flexible parallel is a direct copy
@@ -1202,6 +1952,40 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
#endif /* H5_HAVE_FPHDF5 */
#endif /* H5_HAVE_PARALLEL */
+#ifdef H5_HAVE_PARALLEL
+ if ( ( dirtied ) && ( ((H5AC_info_t *)thing)->is_dirty == FALSE ) &&
+ ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) ) {
+
+ result = H5AC_log_dirtied_entry(f->shared->cache,
+ (H5AC_info_t *)thing,
+ addr,
+ size_changed,
+ new_size);
+
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
+ "H5AC_log_dirtied_entry() failed.")
+ }
+ }
+
+ if ( ( (flags & H5C__DELETED_FLAG) != 0 ) &&
+ ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) &&
+ ( aux_ptr->mpi_rank == 0 ) ) {
+
+ result = H5AC_log_deleted_entry(f->shared->cache,
+ (H5AC_info_t *)thing,
+ addr,
+ flags);
+
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
+ "H5AC_log_deleted_entry() failed.")
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
result = H5C_unprotect(f,
dxpl_id,
H5AC_noblock_dxpl_id,
@@ -1209,7 +1993,8 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
type,
addr,
thing,
- flags);
+ flags,
+ new_size);
if ( result < 0 ) {
@@ -1217,6 +2002,23 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
"H5C_unprotect() failed.")
}
+#ifdef H5_HAVE_PARALLEL
+ if ( ( aux_ptr != NULL ) &&
+ ( aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold ) ) {
+
+ result = H5AC_propagate_flushed_and_still_clean_entries_list(f,
+ H5AC_noblock_dxpl_id,
+ f->shared->cache,
+ TRUE);
+
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \
+ "Can't propagate clean entries list.")
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
done:
FUNC_LEAVE_NOAPI(ret_value)
@@ -1686,6 +2488,184 @@ done:
/*-------------------------------------------------------------------------
*
+ * Function: H5AC_broadcast_clean_list()
+ *
+ * Purpose: Broadcast the contents of the process 0 cleaned entry
+ * slist. In passing, also remove all entries from said
+ * list, and also remove any matching entries from the dirtied
+ * slist.
+ *
+ * This function must only be called by the process with
+ * MPI_rank 0.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 7/1/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_broadcast_clean_list(H5AC_t * cache_ptr)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ haddr_t addr;
+ H5AC_aux_t * aux_ptr = NULL;
+ H5SL_node_t * slist_node_ptr = NULL;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+ MPI_Offset * buf_ptr = NULL;
+ size_t buf_size;
+ int i = 0;
+ int mpi_result;
+ int num_entries;
+
+ FUNC_ENTER_NOAPI(H5AC_broadcast_clean_list, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->mpi_rank == 0 );
+ HDassert( aux_ptr->c_slist_ptr != NULL );
+ HDassert( H5SL_count(aux_ptr->c_slist_ptr) ==
+ (size_t)(aux_ptr->c_slist_len) );
+
+
+ /* First broadcast the number of entries in the list so that the
+ * receives can set up a buffer to receive them. If there aren't
+ * any, we are done.
+ */
+ num_entries = aux_ptr->c_slist_len;
+
+ mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm);
+
+ if ( mpi_result != MPI_SUCCESS ) {
+
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 1", mpi_result)
+
+ }
+
+ if ( num_entries > 0 )
+ {
+ /* allocate a buffer to store the list of entry base addresses in */
+
+ buf_size = sizeof(MPI_Offset) * (size_t)num_entries;
+
+ buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size);
+
+ if ( buf_ptr == NULL ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "memory allocation failed for clean entry buffer")
+ }
+
+ /* now load the entry base addresses into the buffer, emptying the
+ * cleaned entry list in passing
+ */
+
+ while ( NULL != (slist_node_ptr = H5SL_first(aux_ptr->c_slist_ptr) ) )
+ {
+ slist_entry_ptr = H5SL_item(slist_node_ptr);
+
+ HDassert(slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC);
+
+ HDassert( i < num_entries );
+
+ addr = slist_entry_ptr->addr;
+
+ if ( H5FD_mpi_haddr_to_MPIOff(addr, &(buf_ptr[i])) < 0 ) {
+
+ HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, \
+ "can't convert from haddr to MPI off")
+ }
+
+ i++;
+
+ /* now remove the entry from the cleaned entry list */
+ if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from cleaned entry slist.")
+ }
+
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->c_slist_len -= 1;
+
+ HDassert( aux_ptr->c_slist_len >= 0 );
+
+ /* and also remove the matching entry from the dirtied list
+ * if it exists.
+ */
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr,
+ (void *)(&addr))) != NULL ) {
+
+ HDassert( slist_entry_ptr->magic ==
+ H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
+ HDassert( slist_entry_ptr->addr == addr );
+
+ if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from dirty entry slist.")
+ }
+
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->d_slist_len -= 1;
+
+ HDassert( aux_ptr->d_slist_len >= 0 );
+ }
+
+ } /* while */
+
+
+ /* Now broadcast the list of cleaned entries -- if there is one.
+ *
+ * The peculiar structure of the following call to MPI_Bcast is
+ * due to MPI's (?) failure to believe in the MPI_Offset type.
+ * Thus the element type is MPI_BYTE, with size equal to the
+ * buf_size computed above.
+ */
+
+ mpi_result = MPI_Bcast((void *)buf_ptr, (int)buf_size, MPI_BYTE, 0,
+ aux_ptr->mpi_comm);
+
+ if ( mpi_result != MPI_SUCCESS ) {
+
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result)
+ }
+ }
+
+done:
+
+ if ( buf_ptr != NULL ) {
+
+ buf_ptr = (MPI_Offset *)H5MM_xfree((void *)buf_ptr);
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_broadcast_clean_list() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ *
* Function: H5AC_check_if_write_permitted
*
* Purpose: Determine if a write is permitted under the current
@@ -1703,6 +2683,16 @@ done:
*
* Modifications:
*
+ * John Mainzer, 9/23/05
+ * Rewrote function to return the value of the
+ * write_permitted field in aux structure if the structure
+ * exists and mpi_rank is 0.
+ *
+ * If the aux structure exists, but mpi_rank isn't 0, the
+ * function now returns FALSE.
+ *
+ * In all other cases, the function returns TRUE.
+ *
*-------------------------------------------------------------------------
*/
@@ -1720,49 +2710,1087 @@ H5AC_check_if_write_permitted(const H5F_t UNUSED * f,
{
hbool_t write_permitted = TRUE;
herr_t ret_value = SUCCEED; /* Return value */
+#ifdef H5_HAVE_PARALLEL
+ H5AC_aux_t * aux_ptr = NULL;
+#endif /* H5_HAVE_PARALLEL */
+
FUNC_ENTER_NOAPI(H5AC_check_if_write_permitted, FAIL)
#ifdef H5_HAVE_PARALLEL
+ HDassert( f != NULL );
+ HDassert( f->shared != NULL );
+ HDassert( f->shared->cache != NULL );
- if ( IS_H5FD_MPI(f) ) {
+ aux_ptr = (H5AC_aux_t *)(f->shared->cache->aux_ptr);
+
+ if ( aux_ptr != NULL ) {
+
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
- H5P_genplist_t *dxpl; /* Dataset transfer property list */
- H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode property value */
+ if ( aux_ptr->mpi_rank == 0 ) {
- /* Get the dataset transfer property list */
- if ( NULL == (dxpl = H5I_object(dxpl_id)) ) {
+ write_permitted = aux_ptr->write_permitted;
- HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, \
- "not a dataset creation property list")
+ } else {
+
+ write_permitted = FALSE;
+ }
+ }
+#endif /* H5_HAVE_PARALLEL */
+
+ *write_permitted_ptr = write_permitted;
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_check_if_write_permitted() */
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC_log_deleted_entry()
+ *
+ * Purpose: Log an entry for which H5C__DELETED_FLAG has been set.
+ *
+ * If mpi_rank is 0, we must make sure that the entry doesn't
+ * appear in the cleaned or dirty entry lists. Otherwise,
+ * we have nothing to do.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 6/29/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_log_deleted_entry(H5AC_t * cache_ptr,
+ H5AC_info_t * entry_ptr,
+ haddr_t addr,
+ unsigned int flags)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ H5AC_aux_t * aux_ptr = NULL;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+
+ FUNC_ENTER_NOAPI(H5AC_log_deleted_entry, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+
+ HDassert( entry_ptr != NULL );
+ HDassert( entry_ptr->addr == addr );
+
+ HDassert( (flags & H5C__DELETED_FLAG) != 0 );
+
+ if ( aux_ptr->mpi_rank == 0 ) {
+
+ HDassert( aux_ptr->d_slist_ptr != NULL );
+ HDassert( aux_ptr->c_slist_ptr != NULL );
+
+ /* if the entry appears in the dirtied entry slist, remove it. */
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr,
+ (void *)(&addr))) != NULL ) {
+
+ HDassert( slist_entry_ptr->magic ==
+ H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
+ HDassert( slist_entry_ptr->addr == addr );
+
+ if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from dirty entry slist.")
+ }
+
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->d_slist_len -= 1;
+
+ HDassert( aux_ptr->d_slist_len >= 0 );
}
- /* Get the transfer mode property */
- if( H5P_get(dxpl, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode) < 0 ) {
+ /* if the entry appears in the cleaned entry slist, remove it. */
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr,
+ (void *)(&addr))) != NULL ) {
- HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, \
- "can't retrieve xfer mode")
+ HDassert( slist_entry_ptr->magic ==
+ H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
+ HDassert( slist_entry_ptr->addr == addr );
+
+ if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from cleaned entry slist.")
+ }
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->c_slist_len -= 1;
+
+ HDassert( aux_ptr->c_slist_len >= 0 );
}
+ }
- if ( xfer_mode == H5FD_MPIO_INDEPENDENT ) {
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
- write_permitted = FALSE;
+} /* H5AC_log_deleted_entry() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC_log_dirtied_entry()
+ *
+ * Purpose: Update the dirty_bytes count for a newly dirtied entry.
+ *
+ * If mpi_rank isnt 0, this simply means adding the size
+ * of the entries to the dirty_bytes count.
+ *
+ * If mpi_rank is 0, we must first check to see if the entry
+ * appears in the dirty entries slist. If it is, do nothing.
+ * If it isn't, add the size to th dirty_bytes count, add the
+ * entry to the dirty entries slist, and remove it from the
+ * cleaned list (if it is present there).
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 6/29/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_log_dirtied_entry(H5AC_t * cache_ptr,
+ H5AC_info_t * entry_ptr,
+ haddr_t addr,
+ hbool_t size_changed,
+ size_t new_size)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ size_t entry_size;
+ H5AC_aux_t * aux_ptr = NULL;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+
+ FUNC_ENTER_NOAPI(H5AC_log_dirtied_entry, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+
+ HDassert( entry_ptr != NULL );
+ HDassert( entry_ptr->addr == addr );
+ HDassert( entry_ptr->is_dirty == FALSE );
+
+ if ( size_changed ) {
+
+ entry_size = new_size;
+
+ } else {
+
+ entry_size = entry_ptr->size;
+ }
+
+ if ( aux_ptr->mpi_rank == 0 ) {
+
+ HDassert( aux_ptr->d_slist_ptr != NULL );
+ HDassert( aux_ptr->c_slist_ptr != NULL );
+
+ if ( H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr)) == NULL ) {
+
+ /* insert the address of the entry in the dirty entry list, and
+ * add its size to the dirty_bytes count.
+ */
+ if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "Can't allocate dirty slist entry .")
+ }
+
+ slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC;
+ slist_entry_ptr->addr = addr;
+
+ if ( H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr,
+ &(slist_entry_ptr->addr)) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \
+ "can't insert entry into dirty entry slist.")
+ }
+
+ aux_ptr->d_slist_len += 1;
+ aux_ptr->dirty_bytes += entry_size;
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->unprotect_dirty_bytes += entry_size;
+ aux_ptr->unprotect_dirty_bytes_updates += 1;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+ }
+
+ if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) != NULL ) {
+
+ /* the entry is dirty. If it exists on the cleaned entries list,
+ * remove it.
+ */
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr,
+ (void *)(&addr))) != NULL ) {
+
+ HDassert( slist_entry_ptr->magic ==
+ H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
+ HDassert( slist_entry_ptr->addr == addr );
+
+ if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from clean entry slist.")
+ }
+
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->c_slist_len -= 1;
+
+ HDassert( aux_ptr->c_slist_len >= 0 );
+ }
+ }
+ } else {
+
+ aux_ptr->dirty_bytes += entry_size;
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->unprotect_dirty_bytes += entry_size;
+ aux_ptr->unprotect_dirty_bytes_updates += 1;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+ }
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_log_dirtied_entry() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC_log_flushed_entry()
+ *
+ * Purpose: Update the clean entry slist for the flush of an entry --
+ * specifically, if the entry has been cleared, remove it
+ * from both the cleaned and dirtied lists if it is present.
+ * Otherwise, if the entry was dirty, insert the indicated
+ * entry address in the clean slist if it isn't there already.
+ *
+ * This function is only used in PHDF5, and should only
+ * be called for the process with mpi rank 0.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 6/29/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+#if 0 /* This is useful debugging code. -- JRM */
+static herr_t
+H5AC_log_flushed_entry_dummy(H5C_t * cache_ptr,
+ haddr_t addr,
+ hbool_t was_dirty,
+ unsigned flags,
+ int type_id)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ H5AC_aux_t * aux_ptr = NULL;
+
+ FUNC_ENTER_NOAPI(H5AC_log_flushed_entry_dummy, FAIL)
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ if ( ( was_dirty ) && ( (flags & H5C__FLUSH_CLEAR_ONLY_FLAG) == 0 ) ) {
+
+ HDfprintf(stdout,
+ "%d:H5AC_log_flushed_entry(): addr = %d, flags = %x, was_dirty = %d, type_id = %d\n",
+ (int)(aux_ptr->mpi_rank), (int)addr, flags, (int)was_dirty, type_id);
+ }
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_log_flushed_entry_dummy() */
+#endif /* JRM */
+
+static herr_t
+H5AC_log_flushed_entry(H5C_t * cache_ptr,
+ haddr_t addr,
+ hbool_t was_dirty,
+ unsigned flags,
+ UNUSED int type_id)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ hbool_t cleared;
+ H5AC_aux_t * aux_ptr;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+
+
+ FUNC_ENTER_NOAPI(H5AC_log_flushed_entry, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->mpi_rank == 0 );
+ HDassert( aux_ptr->c_slist_ptr != NULL );
+
+ cleared = ( (flags & H5C__FLUSH_CLEAR_ONLY_FLAG) != 0 );
+
+ if ( cleared ) {
+
+ /* If the entry has been cleared, must remove it from both the
+ * cleaned list and the dirtied list.
+ */
+
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr,
+ (void *)(&addr))) != NULL ) {
+
+ HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC);
+ HDassert( slist_entry_ptr->addr == addr );
+
+ if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from clean entry slist.")
+ }
+
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->c_slist_len -= 1;
+
+ HDassert( aux_ptr->c_slist_len >= 0 );
+ }
+
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr,
+ (void *)(&addr))) != NULL ) {
+
+ HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC);
+ HDassert( slist_entry_ptr->addr == addr );
+
+ if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from dirty entry slist.")
+ }
+
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->d_slist_len -= 1;
+
+ HDassert( aux_ptr->d_slist_len >= 0 );
+ }
+ } else if ( was_dirty ) {
+
+ if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) == NULL ) {
+
+ /* insert the address of the entry in the clean entry list. */
+
+ if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "Can't allocate clean slist entry .")
+ }
+
+ slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC;
+ slist_entry_ptr->addr = addr;
+
+ if ( H5SL_insert(aux_ptr->c_slist_ptr, slist_entry_ptr,
+ &(slist_entry_ptr->addr)) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \
+ "can't insert entry into clean entry slist.")
+ }
+
+ aux_ptr->c_slist_len += 1;
+ }
+ }
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_log_flushed_entry() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC_log_inserted_entry()
+ *
+ * Purpose: Update the dirty_bytes count for a newly inserted entry.
+ *
+ * If mpi_rank isnt 0, this simply means adding the size
+ * of the entry to the dirty_bytes count.
+ *
+ * If mpi_rank is 0, we must also add the entry to the
+ * dirty entries slist.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 6/30/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_log_inserted_entry(H5F_t * f,
+ H5AC_t * cache_ptr,
+ H5AC_info_t * entry_ptr,
+ const H5AC_class_t * type,
+ haddr_t addr)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ size_t size;
+ H5AC_aux_t * aux_ptr = NULL;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+
+ FUNC_ENTER_NOAPI(H5AC_log_inserted_entry, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+
+ HDassert( entry_ptr != NULL );
+ HDassert( entry_ptr->addr == addr );
+ HDassert( entry_ptr->type == type );
+
+ /* the size field of the entry will not have been set yet, so we
+ * have to obtain it directly.
+ */
+ if ( (type->size)(f, (void *)entry_ptr, &size) < 0 ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_CANTGETSIZE, FAIL, \
+ "Can't get size of entry to be inserted.")
+ }
+
+ if ( aux_ptr->mpi_rank == 0 ) {
+
+ HDassert( aux_ptr->d_slist_ptr != NULL );
+ HDassert( aux_ptr->c_slist_ptr != NULL );
+
+ if ( H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr)) == NULL ) {
+
+ /* insert the address of the entry in the dirty entry list, and
+ * add its size to the dirty_bytes count.
+ */
+ if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "Can't allocate dirty slist entry .")
+ }
+
+ slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC;
+ slist_entry_ptr->addr = addr;
+
+ if ( H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr,
+ &(slist_entry_ptr->addr)) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \
+ "can't insert entry into dirty entry slist.")
+ }
+
+ aux_ptr->d_slist_len += 1;
+
+ } else {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Inserted entry already in dirty slist.")
+ }
+
+ if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) != NULL ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Inserted entry in clean slist.")
+ }
+ }
+
+ aux_ptr->dirty_bytes += size;
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->insert_dirty_bytes += size;
+ aux_ptr->insert_dirty_bytes_updates += 1;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_log_inserted_entry() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC_log_renamed_entry()
+ *
+ * Purpose: Update the dirty_bytes count for a renamed entry.
+ *
+ * WARNING
+ *
+ * At present, the way that the rename call is used ensures
+ * that the renamed entry is present in all caches by
+ * renaming in a collective operation and immediately after
+ * unprotecting the target entry.
+ *
+ * This function uses this invarient, and will cause arcane
+ * failures if it is not met. If maintaining this invarient
+ * becomes impossible, we will have to rework this function
+ * extensively, and likely include a bit of IPC for
+ * synchronization. A better option might be to subsume
+ * rename in the unprotect operation.
+ *
+ * Given that the target entry is in all caches, the function
+ * proceeds as follows:
+ *
+ * For processes with mpi rank other 0, it simply checks to
+ * see if the entry was dirty prior to the rename, and adds
+ * the entries size to the dirty bytes count.
+ *
+ * In the process with mpi rank 0, the function first checks
+ * to see if the entry was dirty prior to the rename. If it
+ * was, and if the entry doesn't appear in the dirtied list
+ * under its old address, it adds the entry's size to the
+ * dirty bytes count.
+ *
+ * The rank 0 process then removes any references to the
+ * entry under its old address from the cleands and dirtied
+ * lists, and inserts an entry in the dirtied list under the
+ * new address.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 6/30/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_log_renamed_entry(H5AC_t * cache_ptr,
+ haddr_t old_addr,
+ haddr_t new_addr)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ hbool_t entry_in_cache;
+ hbool_t entry_dirty;
+ size_t entry_size;
+ H5AC_aux_t * aux_ptr = NULL;
+ H5AC_slist_entry_t * slist_entry_ptr = NULL;
+
+ FUNC_ENTER_NOAPI(H5AC_log_renamed_entry, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+
+ /* get entry status, size, etc here */
+ if ( H5C_get_entry_status(cache_ptr, old_addr, &entry_size, &entry_in_cache,
+ &entry_dirty, NULL) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't get entry status.")
+
+ } else if ( ! entry_in_cache ) {
+
+ HDassert( entry_in_cache );
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry not in cache.")
+ }
+
+ if ( aux_ptr->mpi_rank == 0 ) {
+
+ HDassert( aux_ptr->d_slist_ptr != NULL );
+ HDassert( aux_ptr->c_slist_ptr != NULL );
+
+ /* if the entry appears in the cleaned entry slist, under its old
+ * address, remove it.
+ */
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr,
+ (void *)(&old_addr))) != NULL ) {
+
+ HDassert( slist_entry_ptr->magic ==
+ H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
+ HDassert( slist_entry_ptr->addr == old_addr );
+
+ if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&old_addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from cleaned entry slist.")
+ }
+
+ slist_entry_ptr->magic = 0;
+ H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr);
+ slist_entry_ptr = NULL;
+
+ aux_ptr->c_slist_len -= 1;
+
+ HDassert( aux_ptr->c_slist_len >= 0 );
+ }
+
+ /* if the entry appears in the dirtied entry slist under its old
+ * address, remove it, but don't free it. Set addr to new_addr.
+ */
+ if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr,
+ (void *)(&old_addr))) != NULL ) {
+
+ HDassert( slist_entry_ptr->magic ==
+ H5AC__H5AC_SLIST_ENTRY_T_MAGIC );
+ HDassert( slist_entry_ptr->addr == old_addr );
+
+ if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&old_addr))
+ != slist_entry_ptr ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \
+ "Can't delete entry from dirty entry slist.")
+ }
+
+ slist_entry_ptr->addr = new_addr;
+
+ aux_ptr->d_slist_len -= 1;
+
+ HDassert( aux_ptr->d_slist_len >= 0 );
} else {
+
+ /* otherwise, allocate a new entry that is ready
+ * for insertion, and increment dirty_bytes.
+ *
+ * Note that the fact that the entry wasn't in the dirtied
+ * list under its old address implies that it must have
+ * been clean to start with.
+ */
+
+ HDassert( !entry_dirty );
+
+ if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "Can't allocate dirty slist entry .")
+ }
+
+ slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC;
+ slist_entry_ptr->addr = new_addr;
- HDassert(xfer_mode == H5FD_MPIO_COLLECTIVE );
+ aux_ptr->dirty_bytes += entry_size;
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->rename_dirty_bytes += entry_size;
+ aux_ptr->rename_dirty_bytes_updates += 1;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
}
+
+ /* verify that there is no entry at new_addr in the dirty slist */
+ if ( H5SL_search(aux_ptr->d_slist_ptr, (void *)(&new_addr)) != NULL ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "dirty slist already contains entry at new_addr.")
+ }
+
+ /* insert / reinsert the entry in the dirty slist */
+ if ( H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr,
+ &(slist_entry_ptr->addr)) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \
+ "can't insert entry into dirty entry slist.")
+ }
+
+ aux_ptr->d_slist_len += 1;
+
+ } else if ( ! entry_dirty ) {
+
+ aux_ptr->dirty_bytes += entry_size;
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->rename_dirty_bytes += entry_size;
+ aux_ptr->rename_dirty_bytes_updates += 1;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
}
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_log_renamed_entry() */
#endif /* H5_HAVE_PARALLEL */
- *write_permitted_ptr = write_permitted;
+
+/*-------------------------------------------------------------------------
+ * Function: H5AC_propagate_flushed_and_still_clean_entries_list
+ *
+ * Purpose: In PHDF5, only the metadata cache with mpi rank 0 is allowed
+ * to write to file. All other metadata caches on processes
+ * with rank greater than 0 must retain dirty entries until
+ * they are notified that the entry is now clean.
+ *
+ * This function is the main routine for that proceedure.
+ * It must be called simultaniously on all processes that
+ * have the relevant file open. To this end, there must
+ * be a barrier immediately prior to this call.
+ *
+ * Typicaly, this will be done one of two ways:
+ *
+ * 1) Dirty byte creation exceeds some user specified value.
+ *
+ * While metadata reads may occur independently, all
+ * operations writing metadata must be collective. Thus
+ * all metadata caches see the same sequence of operations,
+ * and therefore the same dirty data creation.
+ *
+ * This fact is used to synchronize the caches for purposes
+ * of propagating the list of flushed and still clean
+ * entries, by simply calling this function from all
+ * caches whenever some user specified threshold on dirty
+ * data is exceeded.
+ *
+ * 2) Under direct user control -- this operation must be
+ * collective.
+ *
+ * The operations to be managed by this function are as
+ * follows:
+ *
+ * For the process with mpi rank 0:
+ *
+ * 1) Enable writes, flush the cache to its min clean size,
+ * and then disable writes again.
+ *
+ * 2) Load the contents of the flushed and still clean entries
+ * list (c_slist_ptr) into a buffer, and broadcast that
+ * buffer to all the other caches.
+ *
+ * 3) Clear the flushed and still clean entries list
+ * (c_slist_ptr).
+ *
+ *
+ * For all processes with mpi rank greater than 0:
+ *
+ * 1) Receive the flushed and still clean entries list broadcast
+ *
+ * 2) Mark the specified entries as clean.
+ *
+ *
+ * For all processes:
+ *
+ * 1) Reset the dirtied bytes count to 0.
+ *
+ * Return: Success: non-negative
+ *
+ * Failure: negative
+ *
+ * Programmer: John Mainzer
+ * July 5, 2005
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+herr_t
+H5AC_propagate_flushed_and_still_clean_entries_list(H5F_t * f,
+ hid_t dxpl_id,
+ H5AC_t * cache_ptr,
+ hbool_t do_barrier)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ herr_t result;
+ int mpi_code;
+ H5AC_aux_t * aux_ptr = NULL;
+
+ FUNC_ENTER_NOAPI(H5AC_propagate_flushed_and_still_clean_entries_list, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ HDfprintf(stdout,
+ "%d:H5AC_propagate...:%d: (u/uu/i/iu/r/ru) = %d/%d/%d/%d/%d/%d\n",
+ (int)(aux_ptr->mpi_rank),
+ (int)(aux_ptr->dirty_bytes_propagations),
+ (int)(aux_ptr->unprotect_dirty_bytes),
+ (int)(aux_ptr->unprotect_dirty_bytes_updates),
+ (int)(aux_ptr->insert_dirty_bytes),
+ (int)(aux_ptr->insert_dirty_bytes_updates),
+ (int)(aux_ptr->rename_dirty_bytes),
+ (int)(aux_ptr->rename_dirty_bytes_updates));
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+
+ if ( do_barrier ) {
+
+ /* to prevent "messages from the future" we must synchronize all
+ * processes before we start the flush. This synchronization may
+ * already be done -- hence the do_barrier parameter.
+ */
+
+ if ( MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)) ) {
+
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code)
+ }
+ }
+
+ if ( aux_ptr->mpi_rank == 0 ) {
+
+ aux_ptr->write_permitted = TRUE;
+
+ result = H5C_flush_to_min_clean(f, dxpl_id, H5AC_noblock_dxpl_id,
+ cache_ptr);
+
+ aux_ptr->write_permitted = FALSE;
+
+ if ( result < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "H5C_flush_to_min_clean() failed.")
+ }
+
+ if ( H5AC_broadcast_clean_list(cache_ptr) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Can't broadcast clean slist.")
+ }
+
+ HDassert( aux_ptr->c_slist_len == 0 );
+
+ } else {
+
+ if ( H5AC_receive_and_apply_clean_list(f, dxpl_id,
+ H5AC_noblock_dxpl_id,
+ cache_ptr) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Can't receive and/or process clean slist broadcast.")
+ }
+ }
+
+ aux_ptr->dirty_bytes = 0;
+#if H5AC_DEBUG_DIRTY_BYTES_CREATION
+ aux_ptr->dirty_bytes_propagations += 1;
+ aux_ptr->unprotect_dirty_bytes = 0;
+ aux_ptr->unprotect_dirty_bytes_updates = 0;
+ aux_ptr->insert_dirty_bytes = 0;
+ aux_ptr->insert_dirty_bytes_updates = 0;
+ aux_ptr->rename_dirty_bytes = 0;
+ aux_ptr->rename_dirty_bytes_updates = 0;
+#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
done:
FUNC_LEAVE_NOAPI(ret_value)
-} /* H5AC_check_if_write_permitted() */
+} /* H5AC_propagate_flushed_and_still_clean_entries_list() */
+#endif /* H5_HAVE_PARALLEL */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5AC_receive_and_apply_clean_list()
+ *
+ * Purpose: Receive the list of cleaned entries from process 0,
+ * and mark the specified entries as clean.
+ *
+ * This function must only be called by the process with
+ * MPI_rank greater than 0.
+ *
+ * Return SUCCEED on success, and FAIL on failure.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 7/4/05
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifdef H5_HAVE_PARALLEL
+static herr_t
+H5AC_receive_and_apply_clean_list(H5F_t * f,
+ hid_t primary_dxpl_id,
+ hid_t secondary_dxpl_id,
+ H5AC_t * cache_ptr)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+ H5AC_aux_t * aux_ptr = NULL;
+ haddr_t * haddr_buf_ptr = NULL;
+ MPI_Offset * MPI_Offset_buf_ptr = NULL;
+ size_t buf_size;
+ int i = 0;
+ int mpi_result;
+ int num_entries;
+
+ FUNC_ENTER_NOAPI(H5AC_receive_and_apply_clean_list, FAIL)
+
+ HDassert( cache_ptr != NULL );
+ HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
+
+ aux_ptr = cache_ptr->aux_ptr;
+
+ HDassert( aux_ptr != NULL );
+ HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC );
+ HDassert( aux_ptr->mpi_rank != 0 );
+
+ /* First receive the number of entries in the list so that we
+ * can set up a buffer to receive them. If there aren't
+ * any, we are done.
+ */
+ mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm);
+
+ if ( mpi_result != MPI_SUCCESS ) {
+
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 1", mpi_result)
+ }
+
+ if ( num_entries > 0 )
+ {
+ /* allocate a buffers to store the list of entry base addresses in */
+
+ buf_size = sizeof(MPI_Offset) * (size_t)num_entries;
+
+ MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size);
+
+ if ( MPI_Offset_buf_ptr == NULL ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "memory allocation failed for receive buffer")
+ }
+
+ haddr_buf_ptr = (haddr_t *)H5MM_malloc(sizeof(haddr_t) *
+ (size_t)num_entries);
+
+ if ( haddr_buf_ptr == NULL ) {
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "memory allocation failed for haddr buffer")
+ }
+
+
+ /* Now receive the list of cleaned entries
+ *
+ * The peculiar structure of the following call to MPI_Bcast is
+ * due to MPI's (?) failure to believe in the MPI_Offset type.
+ * Thus the element type is MPI_BYTE, with size equal to the
+ * buf_size computed above.
+ */
+
+ mpi_result = MPI_Bcast((void *)MPI_Offset_buf_ptr, (int)buf_size,
+ MPI_BYTE, 0, aux_ptr->mpi_comm);
+
+ if ( mpi_result != MPI_SUCCESS ) {
+
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result)
+ }
+
+
+ /* translate the MPI_Offsets to haddr_t */
+ i = 0;
+ while ( i < num_entries )
+ {
+ haddr_buf_ptr[i] = H5FD_mpi_MPIOff_to_haddr(MPI_Offset_buf_ptr[i]);
+
+ if ( haddr_buf_ptr[i] == HADDR_UNDEF ) {
+
+ HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, \
+ "can't convert MPI off to haddr")
+ }
+
+ i++;
+ }
+
+
+ /* mark the indicated entries as clean */
+ if ( H5C_mark_entries_as_clean(f, primary_dxpl_id, secondary_dxpl_id,
+ cache_ptr, (int32_t)num_entries,
+ &(haddr_buf_ptr[0])) < 0 ) {
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "Can't mark entries clean.")
+
+ }
+ }
+
+done:
+
+ if ( MPI_Offset_buf_ptr != NULL ) {
+
+ MPI_Offset_buf_ptr =
+ (MPI_Offset *)H5MM_xfree((void *)MPI_Offset_buf_ptr);
+ }
+
+ if ( haddr_buf_ptr != NULL ) {
+
+ haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr);
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5AC_receive_and_apply_clean_list() */
+#endif /* H5_HAVE_PARALLEL */
+