From c100b0bf2639c03579ce1b2c4013b36c6f40350b Mon Sep 17 00:00:00 2001 From: John Mainzer Date: Tue, 27 Sep 2005 00:20:11 -0500 Subject: [svn-r11470] Purpose: Repair synchronization bug in the metadata cache in PHDF5 Also repair numerous other bugs that surfaced in testing the bug fix. Description: While operations modifying metadata must be collective, we allow independant reads. This allows metadata caches on different processes to adjust to different sizes, and to place the entries on their dirty lists in different orders. Since only process 0 actually writes metadata to disk (all other processes thought they did, but the writes were discarded on the theory that they had to be collective), this made it possible for another process to modify metadata, flush it, and then read it back in in its original form (pre-modification) form. The possibilities for file corruption should be obvious. Solution: Make the policy that only process 0 can write to file explicit, and visible to the metadata caches. Thus only process 0 may flush dirty entries -- all other caches must retain dirty entries until they are informed by process 0 that the entries are clean. Synchronization is handled by counting the bytes of dirty cache entries created, and then synching up between the caches whenever the sum exceeds an (eventually user specified) limit. Dirty metadata creation is consistent across all processes because all operations modifying metadata must be collective. This change uncovered may bugs which are repaired in this checkin. It also required modification of H5HL and H5O to allocate file space on insertion rather than on flush from cache. Platforms tested: H5committest, heping(parallel & serial) Misc. update: --- examples/h5_attribute.c | 4 +- src/H5AC.c | 2114 ++++++++++++++++++++++++++++++++++++++++++++++- src/H5ACprivate.h | 4 +- src/H5C.c | 1053 ++++++++++++++++++++++- src/H5Cpkg.h | 39 +- src/H5Cprivate.h | 71 +- src/H5D.c | 2 +- src/H5F.c | 62 +- src/H5FD.c | 2 +- src/H5FDmpio.c | 38 +- src/H5FDmpiposix.c | 34 +- src/H5FDmulti.c | 9 +- src/H5Fprivate.h | 1 + src/H5HL.c | 189 ++++- src/H5HLpkg.h | 1 - src/H5MF.c | 8 +- src/H5MFprivate.h | 2 +- src/H5O.c | 746 +++++++++++------ src/H5Oprivate.h | 6 +- test/cache.c | 19 +- testpar/t_mdset.c | 6 +- 21 files changed, 3996 insertions(+), 414 deletions(-) diff --git a/examples/h5_attribute.c b/examples/h5_attribute.c index 6857236..96fdd63 100644 --- a/examples/h5_attribute.c +++ b/examples/h5_attribute.c @@ -188,11 +188,11 @@ main (void) /* * Attach to the string attribute using its index, then read and display the value. */ - attr = H5Aopen_idx(dataset, 2); + attr = H5Aopen_idx(dataset, 1); atype = H5Tcopy(H5T_C_S1); H5Tset_size(atype, 5); ret = H5Aread(attr, atype, string_out); - printf("The value of the attribute with index 2 is %s \n", string_out); + printf("The value of the attribute with index 1 is %s \n", string_out); ret = H5Aclose(attr); ret = H5Tclose(atype); diff --git a/src/H5AC.c b/src/H5AC.c index d4b70d2..eff9137 100644 --- a/src/H5AC.c +++ b/src/H5AC.c @@ -42,18 +42,24 @@ *------------------------------------------------------------------------- */ +#define H5C_PACKAGE /*suppress error about including H5Cpkg */ #define H5F_PACKAGE /*suppress error about including H5Fpkg */ /* Interface initialization */ #define H5_INTERFACE_INIT_FUNC H5AC_init_interface +#ifdef H5_HAVE_PARALLEL +#include +#endif /* H5_HAVE_PARALLEL */ #include "H5private.h" /* Generic Functions */ #include "H5ACprivate.h" /* Metadata cache */ +#include "H5Cpkg.h" /* Cache */ #include "H5Dprivate.h" /* Dataset functions */ #include "H5Eprivate.h" /* Error handling */ #include "H5Fpkg.h" /* Files */ #include "H5FDprivate.h" /* File drivers */ +#include "H5FLprivate.h" /* Free Lists */ #include "H5Iprivate.h" /* IDs */ #include "H5Pprivate.h" /* Property lists */ @@ -62,6 +68,299 @@ #include "H5FPprivate.h" /* Flexible PHDF5 */ #endif /* H5_HAVE_FPHDF5 */ +#define H5AC_DEBUG_DIRTY_BYTES_CREATION 0 + +/**************************************************************************** + * + * structure H5AC_aux_t + * + * While H5AC has become a wrapper for the cache implemented in H5C.c, there + * are some features of the metadata cache that are specific to it, and which + * therefore do not belong in the more generic H5C cache code. + * + * In particular, there is the matter of synchronizing writes from the + * metadata cache to disk in the PHDF5 case. + * + * Prior to this update, the presumption was that all metadata caches would + * write the same data at the same time since all operations modifying + * metadata must be performed collectively. Given this assumption, it was + * safe to allow only the writes from process 0 to actually make it to disk, + * while metadata writes from all other processes were discarded. + * + * Unfortunately, this presumption is in error as operations that read + * metadata need not be collective, but can change the location of dirty + * entries in the metadata cache LRU lists. This can result in the same + * metadata write operation triggering writes from the metadata caches on + * some processes, but not all (causing a hang), or in different sets of + * entries being written from different caches (potentially resulting in + * metadata corruption in the file). + * + * To deal with this issue, I decided to apply a paradigm shift to the way + * metadata is written to disk. + * + * With this set of changes, only the metadata cache on process 0 is able + * to write metadata to disk, although metadata caches on all other + * processes can read metadata from disk as before. + * + * To keep all the other caches from getting plugged up with dirty metadata, + * process 0 periodically broadcasts a list of entries that it has flushed + * since that last notice, and which are currently clean. The other caches + * mark these entries as clean as well, which allows them to evict the + * entries as needed. + * + * One obvious problem in this approach is synchronizing the broadcasts + * and receptions, as different caches may see different amounts of + * activity. + * + * The current solution is for the caches to track the number of bytes + * of newly generated dirty metadata, and to broadcast and receive + * whenever this value exceeds some user specified threshold. + * + * Maintaining this count is easy for all processes not on process 0 -- + * all that is necessary is to add the size of the entry to the total + * whenever there is an insertion, a rename of a previously clean entry, + * or whever a previously clean entry is marked dirty in an unprotect. + * + * On process 0, we have to be careful not to count dirty bytes twice. + * If an entry is marked dirty, flushed, and marked dirty again, all + * within a single reporting period, it only th first marking should + * be added to the dirty bytes generated tally, as that is all that + * the other processes will see. + * + * At present, this structure exists to maintain the fields needed to + * implement the above scheme, and thus is only used in the parallel + * case. However, other uses may arise in the future. + * + * Instance of this structure are associated with metadata caches via + * the aux_ptr field of H5C_t (see H5Cpkg.h). The H5AC code is + * responsible for allocating, maintaining, and discarding instances + * of H5AC_aux_t. + * + * The remainder of this header comments documents the individual fields + * of the structure. + * + * JRM - 6/27/05 + * + * magic: Unsigned 32 bit integer always set to + * H5AC__H5AC_AUX_T_MAGIC. This field is used to validate + * pointers to instances of H5AC_aux_t. + * + * mpi_comm: MPI communicator associated with the file for which the + * cache has been created. + * + * mpi_rank: MPI rank of this process within mpi_comm. + * + * mpi_size: Number of processes in mpi_comm. + * + * write_permitted: Boolean flag used to control whether the cache + * is permitted to write to file. + * + * dirty_bytes_threshold: Integer field containing the dirty bytes + * generation threashold. Whenever dirty byte creation + * exceeds this value, the metadata cache on process 0 + * broadcasts a list of the entries it has flushed since + * the last broadcast (or since the beginning of execution) + * and which are currently clean (if they are still in the + * cache) + * + * Similarly, metadata caches on processes other than process + * 0 will attempt to receive a list of clean entries whenever + * the threshold is exceeded. + * + * dirty_bytes: Integer field containing the number of bytes of dirty + * metadata generated since the beginning of the computation, + * or (more typically) since the last clean entries list + * broadcast. This field is reset to zero after each such + * broadcast. + * + * dirty_bytes_propagations: This field only exists when the + * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. + * + * It is used to track the number of times the cleaned list + * has been propagated from process 0 to the other + * processes. + * + * unprotect_dirty_bytes: This field only exists when the + * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. + * + * It is used to track the number of dirty bytes created + * via unprotect operations since the last time the cleaned + * list was propagated. + * + * unprotect_dirty_bytes_updates: This field only exists when the + * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. + * + * It is used to track the number of times dirty bytes have + * been created via unprotect operations since the last time + * the cleaned list was propagated. + * + * insert_dirty_bytes: This field only exists when the + * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. + * + * It is used to track the number of dirty bytes created + * via insert operations since the last time the cleaned + * list was propagated. + * + * insert_dirty_bytes_updates: This field only exists when the + * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. + * + * It is used to track the number of times dirty bytes have + * been created via insert operations since the last time + * the cleaned list was propagated. + * + * rename_dirty_bytes: This field only exists when the + * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. + * + * It is used to track the number of dirty bytes created + * via rename operations since the last time the cleaned + * list was propagated. + * + * rename_dirty_bytes_updates: This field only exists when the + * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. + * + * It is used to track the number of times dirty bytes have + * been created via rename operations since the last time + * the cleaned list was propagated. + * + * d_slist_ptr: Pointer to an instance of H5SL_t used to maintain a list + * of entries that have been dirtied since the last time they + * were listed in a clean entries broadcast. This list is + * only maintained by the metadata cache on process 0 -- it + * it used to maintain a view of the dirty entries as seen + * by the other caches, so as to keep the dirty bytes count + * in synchronization with them. + * + * Thus on process 0, the dirty_bytes count is incremented + * only if either + * + * 1) an entry is inserted in the metadata cache, or + * + * 2) a previously clean entry is renamed, and it does not + * already appear in the dirty entry list, or + * + * 3) a previously clean entry is unprotected with the + * dirtied flag set and the entry does not already appear + * in the dirty entry list. + * + * Entries are added to the dirty entry list whever they cause + * the dirty bytes count to be increased. They are removed + * when they appear in a clean entries broadcast. Note that + * renames must be reflected in the dirty entry list. + * + * To reitterate, this field is only used on process 0 -- it + * should be NULL on all other processes. + * + * d_slist_len: Integer field containing the number of entries in the + * dirty entry list. This field should always contain the + * value 0 on all processes other than process 0. It exists + * primarily for sanity checking. + * + * c_slist_ptr: Pointer to an instance of H5SL_t used to maintain a list + * of entries that were dirty, have been flushed + * to disk since the last clean entries broadcast, and are + * still clean. Since only process 0 can write to disk, this + * list only exists on process 0. + * + * In essence, this slist is used to assemble the contents of + * the next clean entries broadcast. The list emptied after + * each broadcast. + * + * c_slist_len: Integer field containing the number of entries in the clean + * entries list (*c_slist_ptr). This field should always + * contain the value 0 on all processes other than process 0. + * It exists primarily for sanity checking. + * + ****************************************************************************/ + +#ifdef H5_HAVE_PARALLEL + +#define H5AC__H5AC_AUX_T_MAGIC (unsigned)0x00D0A01 + +typedef struct H5AC_aux_t +{ + uint32_t magic; + + MPI_Comm mpi_comm; + + int mpi_rank; + + int mpi_size; + + hbool_t write_permitted; + + int32_t dirty_bytes_threshold; + + int32_t dirty_bytes; + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + + int32_t dirty_bytes_propagations; + + int32_t unprotect_dirty_bytes; + int32_t unprotect_dirty_bytes_updates; + + int32_t insert_dirty_bytes; + int32_t insert_dirty_bytes_updates; + + int32_t rename_dirty_bytes; + int32_t rename_dirty_bytes_updates; + +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + + H5SL_t * d_slist_ptr; + + int32_t d_slist_len; + + H5SL_t * c_slist_ptr; + + int32_t c_slist_len; + +} H5AC_aux_t; /* struct H5AC_aux_t */ + +/* Declare a free list to manage the H5AC_aux_t struct */ +H5FL_DEFINE_STATIC(H5AC_aux_t); + +#endif /* H5_HAVE_PARALLEL */ + +/**************************************************************************** + * + * structure H5AC_slist_entry_t + * + * The dirty entry list maintained via the d_slist_ptr field of H5AC_aux_t + * and the cleaned entry list maintained via the c_slist_ptr field of + * H5AC_aux_t are just lists of the file offsets of the dirty/cleaned + * entries. Unfortunately, the slist code makes us define a dynamically + * allocated structure to store these offsets in. This structure serves + * that purpose. Its fields are as follows: + * + * magic: Unsigned 32 bit integer always set to + * H5AC__H5AC_SLIST_ENTRY_T_MAGIC. This field is used to + * validate pointers to instances of H5AC_slist_entry_t. + * + * addr: file offset of a metadata entry. Entries are added to this + * list (if they aren't there already) when they are marked + * dirty in an unprotect, inserted, or renamed. They are + * removed when they appear in a clean entries broadcast. + * + ****************************************************************************/ + +#ifdef H5_HAVE_PARALLEL + +#define H5AC__H5AC_SLIST_ENTRY_T_MAGIC 0x00D0A02 + +typedef struct H5AC_slist_entry_t +{ + uint32_t magic; + + haddr_t addr; +} H5AC_slist_entry_t; + +/* Declare a free list to manage the H5AC_slist_entry_t struct */ +H5FL_DEFINE_STATIC(H5AC_slist_entry_t); + +#endif /* H5_HAVE_PARALLEL */ + + /* * Private file-scope variables. */ @@ -90,6 +389,55 @@ static herr_t H5AC_check_if_write_permitted(const H5F_t *f, hid_t dxpl_id, hbool_t * write_permitted_ptr); +#ifdef H5_HAVE_PARALLEL +static herr_t H5AC_broadcast_clean_list(H5AC_t * cache_ptr); + +static herr_t H5AC_log_deleted_entry(H5AC_t * cache_ptr, + H5AC_info_t * entry_ptr, + haddr_t addr, + unsigned int flags); + +static herr_t H5AC_log_dirtied_entry(H5AC_t * cache_ptr, + H5C_cache_entry_t * entry_ptr, + haddr_t addr, + hbool_t size_changed, + size_t new_size); + +static herr_t H5AC_log_flushed_entry(H5C_t * cache_ptr, + haddr_t addr, + hbool_t was_dirty, + unsigned flags, + int type_id); + +#if 0 /* this is useful debugging code -- JRM */ +static herr_t H5AC_log_flushed_entry_dummy(H5C_t * cache_ptr, + haddr_t addr, + hbool_t was_dirty, + unsigned flags, + int type_id); +#endif /* JRM */ + +static herr_t H5AC_log_inserted_entry(H5F_t * f, + H5AC_t * cache_ptr, + H5AC_info_t * entry_ptr, + const H5AC_class_t * type, + haddr_t addr); + +static herr_t H5AC_propagate_flushed_and_still_clean_entries_list(H5F_t * f, + hid_t dxpl_id, + H5AC_t * cache_ptr, + hbool_t do_barrier); + +static herr_t H5AC_receive_and_apply_clean_list(H5F_t * f, + hid_t primary_dxpl_id, + hid_t secondary_dxpl_id, + H5AC_t * cache_ptr); + +static herr_t H5AC_log_renamed_entry(H5AC_t * cache_ptr, + haddr_t old_addr, + haddr_t new_addr); +#endif /* H5_HAVE_PARALLEL */ + /*------------------------------------------------------------------------- * Function: H5AC_init @@ -351,6 +699,13 @@ H5AC_term_interface(void) * through the function. * JRM - 4/7/05 * + * Added code allocating and initializing the auxilary + * structure (an instance of H5AC_aux_t), and linking it + * to the instance of H5C_t created by H5C_create(). At + * present, the auxilary structure is only used in PHDF5. + * + * JRM - 6/28/05 + * *------------------------------------------------------------------------- */ @@ -376,6 +731,12 @@ H5AC_create(const H5F_t *f, { herr_t ret_value = SUCCEED; /* Return value */ herr_t result; +#ifdef H5_HAVE_PARALLEL + MPI_Comm mpi_comm = MPI_COMM_NULL; + int mpi_rank = -1; + int mpi_size = -1; + H5AC_aux_t * aux_ptr = NULL; +#endif /* H5_HAVE_PARALLEL */ FUNC_ENTER_NOAPI(H5AC_create, FAIL) @@ -390,15 +751,138 @@ H5AC_create(const H5F_t *f, HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Bad cache configuration"); } - /* The default max cache size and min clean size will frequently be - * overwritten shortly by the subsequent set resize config call. - * -- JRM - */ - f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE, - H5AC__DEFAULT_MIN_CLEAN_SIZE, - (H5AC_NTYPES - 1), - (const char **)H5AC_entry_type_names, - H5AC_check_if_write_permitted); +#ifdef H5_HAVE_PARALLEL + if ( IS_H5FD_MPI(f) ) { + + if ( (mpi_comm = H5F_mpi_get_comm(f)) == MPI_COMM_NULL ) { + + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, \ + "can't get MPI communicator") + } + + if ( (mpi_rank = H5F_mpi_get_rank(f)) < 0 ) { + + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get mpi rank") + } + + if ( (mpi_size = H5F_mpi_get_size(f)) < 0 ) { + + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get mpi size") + } + + /* There is no point in setting up the auxilary structure if size + * is less than or equal to 1, as there will never be any processes + * to broadcast the clean lists to. + */ + if ( mpi_size > 1 ) { + + if ( NULL == (aux_ptr = H5FL_CALLOC(H5AC_aux_t)) ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "Can't allocate H5AC auxilary structure.") + + } else { + + aux_ptr->magic = H5AC__H5AC_AUX_T_MAGIC; + aux_ptr->mpi_comm = mpi_comm; + aux_ptr->mpi_rank = mpi_rank; + aux_ptr->mpi_size = mpi_size; + aux_ptr->write_permitted = FALSE; + aux_ptr->dirty_bytes_threshold = 256 * 1024; + aux_ptr->dirty_bytes = 0; +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->dirty_bytes_propagations = 0; + aux_ptr->unprotect_dirty_bytes = 0; + aux_ptr->unprotect_dirty_bytes_updates = 0; + aux_ptr->insert_dirty_bytes = 0; + aux_ptr->insert_dirty_bytes_updates = 0; + aux_ptr->rename_dirty_bytes = 0; + aux_ptr->rename_dirty_bytes_updates = 0; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + aux_ptr->d_slist_ptr = NULL; + aux_ptr->d_slist_len = 0; + aux_ptr->c_slist_ptr = NULL; + aux_ptr->c_slist_len = 0; + } + + if ( mpi_rank == 0 ) { + + aux_ptr->d_slist_ptr = + H5SL_create(H5SL_TYPE_HADDR,0.5,(size_t)16); + + if ( aux_ptr->d_slist_ptr == NULL ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, + "can't create dirtied entry list.") + } + + aux_ptr->c_slist_ptr = + H5SL_create(H5SL_TYPE_HADDR,0.5,(size_t)16); + + if ( aux_ptr->c_slist_ptr == NULL ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, + "can't create cleaned entry list.") + } + } + } + + if ( aux_ptr != NULL ) { + + if ( aux_ptr->mpi_rank == 0 ) { + + f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE, + H5AC__DEFAULT_MIN_CLEAN_SIZE, + (H5AC_NTYPES - 1), + (const char **)H5AC_entry_type_names, + H5AC_check_if_write_permitted, + TRUE, + H5AC_log_flushed_entry, + (void *)aux_ptr); + + } else { + + f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE, + H5AC__DEFAULT_MIN_CLEAN_SIZE, + (H5AC_NTYPES - 1), + (const char **)H5AC_entry_type_names, + NULL, + FALSE, +#if 0 /* this is useful debugging code -- keep it for a while */ /* JRM */ + H5AC_log_flushed_entry_dummy, +#else /* JRM */ + NULL, +#endif /* JRM */ + (void *)aux_ptr); + } + } else { + + f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE, + H5AC__DEFAULT_MIN_CLEAN_SIZE, + (H5AC_NTYPES - 1), + (const char **)H5AC_entry_type_names, + H5AC_check_if_write_permitted, + TRUE, + NULL, + NULL); + } + } else { +#endif /* H5_HAVE_PARALLEL */ + /* The default max cache size and min clean size will frequently be + * overwritten shortly by the subsequent set resize config call. + * -- JRM + */ + f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE, + H5AC__DEFAULT_MIN_CLEAN_SIZE, + (H5AC_NTYPES - 1), + (const char **)H5AC_entry_type_names, + H5AC_check_if_write_permitted, + TRUE, + NULL, + NULL); +#ifdef H5_HAVE_PARALLEL + } +#endif /* H5_HAVE_PARALLEL */ if ( NULL == f->shared->cache ) { @@ -416,6 +900,31 @@ H5AC_create(const H5F_t *f, done: +#ifdef H5_HAVE_PARALLEL + + /* if there is a failure, try to tidy up the auxilary structure */ + + if ( ret_value != SUCCEED ) { + + if ( aux_ptr != NULL ) { + + if ( aux_ptr->d_slist_ptr != NULL ) { + + H5SL_close(aux_ptr->d_slist_ptr); + } + + if ( aux_ptr->c_slist_ptr != NULL ) { + + H5SL_close(aux_ptr->c_slist_ptr); + } + + aux_ptr->magic = 0; + H5FL_FREE(H5AC_aux_t, aux_ptr); + aux_ptr = NULL; + } + } +#endif /* H5_HAVE_PARALLEL */ + FUNC_LEAVE_NOAPI(ret_value) } /* H5AC_create() */ @@ -445,6 +954,10 @@ done: * * JRM - 6/7/04 * + * Added code to free the auxiliary structure and its + * associated slist if present. + * JRM - 6/28/05 + * *------------------------------------------------------------------------- */ herr_t @@ -452,12 +965,23 @@ H5AC_dest(H5F_t *f, hid_t dxpl_id) { H5AC_t *cache = NULL; herr_t ret_value=SUCCEED; /* Return value */ +#ifdef H5_HAVE_PARALLEL + H5AC_aux_t * aux_ptr = NULL; +#endif /* H5_HAVE_PARALLEL */ FUNC_ENTER_NOAPI(H5AC_dest, FAIL) assert(f); assert(f->shared->cache); cache = f->shared->cache; +#ifdef H5_HAVE_PARALLEL + aux_ptr = cache->aux_ptr; + + if ( aux_ptr != NULL ) { + + HDassert ( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + } +#endif /* H5_HAVE_PARALLEL */ f->shared->cache = NULL; @@ -466,6 +990,25 @@ H5AC_dest(H5F_t *f, hid_t dxpl_id) HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "can't destroy cache") } +#ifdef H5_HAVE_PARALLEL + if ( aux_ptr != NULL ) { + + if ( aux_ptr->d_slist_ptr != NULL ) { + + H5SL_close(aux_ptr->d_slist_ptr); + } + + if ( aux_ptr->c_slist_ptr != NULL ) { + + H5SL_close(aux_ptr->c_slist_ptr); + } + + aux_ptr->magic = 0; + H5FL_FREE(H5AC_aux_t, aux_ptr); + aux_ptr = NULL; + } +#endif /* H5_HAVE_PARALLEL */ + done: FUNC_LEAVE_NOAPI(ret_value) @@ -529,28 +1072,103 @@ done: * * Complete re-write. See above for details. -- JRM 5/11/04 * - * Abstracted the guts of the function to H5C_dest() in H5C.c, - * and then re-wrote the function as a wrapper for H5C_dest(). + * Abstracted the guts of the function to H5C_flush_cache() + * in H5C.c, and then re-wrote the function as a wrapper for + * H5C_flush_cache(). * * JRM - 6/7/04 * + * JRM - 7/5/05 + * Modified function as part of a fix for a cache coherency + * bug in PHDF5. See the header comments on the H5AC_aux_t + * structure for details. + * *------------------------------------------------------------------------- */ herr_t H5AC_flush(H5F_t *f, hid_t dxpl_id, unsigned flags) { - herr_t status; - herr_t ret_value=SUCCEED; /* Return value */ + herr_t status; + herr_t ret_value = SUCCEED; /* Return value */ +#ifdef H5_HAVE_PARALLEL + H5AC_aux_t * aux_ptr = NULL; + int mpi_code; +#endif /* H5_HAVE_PARALLEL */ + FUNC_ENTER_NOAPI(H5AC_flush, FAIL) HDassert(f); HDassert(f->shared->cache); - status = H5C_flush_cache(f, - dxpl_id, - H5AC_noblock_dxpl_id, - f->shared->cache, +#ifdef H5_HAVE_PARALLEL + aux_ptr = f->shared->cache->aux_ptr; + + if ( aux_ptr != NULL ) { + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + HDfprintf(stdout, + "%d::H5AC_flush: (u/uu/i/iu/r/ru) = %d/%d/%d/%d/%d/%d\n", + (int)(aux_ptr->mpi_rank), + (int)(aux_ptr->unprotect_dirty_bytes), + (int)(aux_ptr->unprotect_dirty_bytes_updates), + (int)(aux_ptr->insert_dirty_bytes), + (int)(aux_ptr->insert_dirty_bytes_updates), + (int)(aux_ptr->rename_dirty_bytes), + (int)(aux_ptr->rename_dirty_bytes_updates)); +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + + /* to prevent "messages from the future" we must synchronize all + * processes before we start the flush. Hence the following + * barrier. + */ + if ( MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)) ) { + + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) + } + + /* if the clear only flag is set, this flush will not involve any + * disk I/O. In such cases, it is not necessary to let process 0 + * flush first. + */ + if ( ( aux_ptr->mpi_rank == 0 ) && + ( (flags & H5AC__FLUSH_CLEAR_ONLY_FLAG) != 0 ) ) { + + unsigned init_flush_flags = H5AC__NO_FLAGS_SET; + + if ( ( (flags & H5AC__FLUSH_MARKED_ENTRIES_FLAG) != 0 ) && + ( (flags & H5AC__FLUSH_INVALIDATE_FLAG) == 0 ) ) { + + init_flush_flags |= H5AC__FLUSH_MARKED_ENTRIES_FLAG; + } + + aux_ptr->write_permitted = TRUE; + + status = H5C_flush_cache(f, + H5AC_noblock_dxpl_id, + H5AC_noblock_dxpl_id, + f->shared->cache, + init_flush_flags); + + aux_ptr->write_permitted = FALSE; + + if ( status < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.") + } + } /* end if ( aux_ptr->mpi_rank == 0 ) */ + + status = H5AC_propagate_flushed_and_still_clean_entries_list(f, + H5AC_noblock_dxpl_id, + f->shared->cache, + FALSE); + } /* end if ( aux_ptr != NULL ) */ +#endif /* H5_HAVE_PARALLEL */ + + status = H5C_flush_cache(f, + dxpl_id, + H5AC_noblock_dxpl_id, + f->shared->cache, flags); if ( status < 0 ) { @@ -616,8 +1234,16 @@ done: * moving management of the dirty flag on cache entries into * the cache code. * + * JRM - 7/5/05 + * Added code to track dirty byte generation, and to trigger + * clean entry list propagation when it exceeds a user + * specified threshold. Note that this code only applies in + * the PHDF5 case. It should have no effect on either the + * serial or FPHSD5 cases. + * *------------------------------------------------------------------------- */ + herr_t H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void *thing, unsigned int flags) { @@ -625,6 +1251,9 @@ H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void * H5AC_info_t *info; H5AC_t *cache; herr_t ret_value=SUCCEED; /* Return value */ +#ifdef H5_HAVE_PARALLEL + H5AC_aux_t * aux_ptr = NULL; +#endif /* H5_HAVE_PARALLEL */ FUNC_ENTER_NOAPI(H5AC_set, FAIL) @@ -717,6 +1346,23 @@ H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void * #endif /* H5_HAVE_FPHDF5 */ #endif /* H5_HAVE_PARALLEL */ +#ifdef H5_HAVE_PARALLEL + if ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) { + + result = H5AC_log_inserted_entry(f, + f->shared->cache, + (H5AC_info_t *)thing, + type, + addr); + + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, \ + "H5AC_log_inserted_entry() failed.") + } + } +#endif /* H5_HAVE_PARALLEL */ + result = H5C_insert_entry(f, dxpl_id, H5AC_noblock_dxpl_id, @@ -731,6 +1377,22 @@ H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void * HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, "H5C_insert_entry() failed") } +#ifdef H5_HAVE_PARALLEL + if ( ( aux_ptr != NULL ) && + ( aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold ) ) { + + result = H5AC_propagate_flushed_and_still_clean_entries_list(f, + H5AC_noblock_dxpl_id, + f->shared->cache, + TRUE); + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "Can't propagate clean entries list.") + } + } +#endif /* H5_HAVE_PARALLEL */ + done: FUNC_LEAVE_NOAPI(ret_value) @@ -766,6 +1428,18 @@ done: * in H5C.c, and then re-wrote the function as a wrapper for * H5C_rename_entry(). * + * JRM - 7/5/05 + * Added code to track dirty byte generation, and to trigger + * clean entry list propagation when it exceeds a user + * specified threshold. Note that this code only applies in + * the PHDF5 case. It should have no effect on either the + * serial or FPHSD5 cases. + * + * Note that this code presumes that the renamed entry will + * be present in all caches -- which it must be at present. + * To maintain this invarient, only rename entries immediately + * after you unprotect them. + * *------------------------------------------------------------------------- */ herr_t @@ -773,6 +1447,9 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad { herr_t result; herr_t ret_value=SUCCEED; /* Return value */ +#ifdef H5_HAVE_PARALLEL + H5AC_aux_t * aux_ptr = NULL; +#endif /* H5_HAVE_PARALLEL */ FUNC_ENTER_NOAPI(H5AC_rename, FAIL) @@ -808,6 +1485,13 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad * In any case, don't check this code in without revisiting this * issue. * JRM -- 6/6/05 + * + * On reflection, the code was already broken, as there was no + * way to advise the SAP that a renamed entry had changed its + * address, or was dirty. I will not worry about it for now, + * but the matter must be addressed if we ever get serious + * about FPHDF5. + * JRM -- 7/5/05 */ HGOTO_DONE(SUCCEED); @@ -816,6 +1500,21 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad #endif /* H5_HAVE_FPHDF5 */ #endif /* H5_HAVE_PARALLEL */ +#ifdef H5_HAVE_PARALLEL + if ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) { + + result = H5AC_log_renamed_entry(f->shared->cache, + old_addr, + new_addr); + + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "H5AC_log_renamed_entry() failed.") + } + } +#endif /* H5_HAVE_PARALLEL */ + result = H5C_rename_entry(f->shared->cache, type, old_addr, @@ -827,6 +1526,22 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad "H5C_rename_entry() failed.") } +#ifdef H5_HAVE_PARALLEL + if ( ( aux_ptr != NULL ) && + ( aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold ) ) { + + result = H5AC_propagate_flushed_and_still_clean_entries_list(f, + H5AC_noblock_dxpl_id, + f->shared->cache, + TRUE); + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "Can't propagate clean entries list.") + } + } +#endif /* H5_HAVE_PARALLEL */ + done: FUNC_LEAVE_NOAPI(ret_value) @@ -837,7 +1552,7 @@ done: /*------------------------------------------------------------------------- * Function: H5AC_protect * - * Purpose: If the target entry is not in the cache, load it. If + * Purpose: If the target entry is not in the cache, load it. If * necessary, attempt to evict one or more entries to keep * the cache within its maximum size. * @@ -845,8 +1560,8 @@ done: * to the caller. The caller must call H5AC_unprotect() when * finished with the entry. * - * While it is protected, the entry may not be either evicted - * or flushed -- nor may it be accessed by another call to + * While it is protected, the entry may not be either evicted + * or flushed -- nor may it be accessed by another call to * H5AC_protect. Any attempt to do so will result in a failure. * * This comment is a re-write of the original Purpose: section. @@ -887,8 +1602,8 @@ done: * Purpose section above. * * JRM - 6/7/04 - * Abstracted the guts of the function to H5C_protect() - * in H5C.c, and then re-wrote the function as a wrapper for + * Abstracted the guts of the function to H5C_protect() + * in H5C.c, and then re-wrote the function as a wrapper for * H5C_protect(). * *------------------------------------------------------------------------- @@ -1108,6 +1823,18 @@ done: * part of a collection of changes directed at moving * management of cache entry dirty flags into the H5C code. * + * JRM - 7/5/05 + * Added code to track dirty byte generation, and to trigger + * clean entry list propagation when it exceeds a user + * specified threshold. Note that this code only applies in + * the PHDF5 case. It should have no effect on either the + * serial or FPHSD5 cases. + * + * JRM - 9/8/05 + * Added code to track entry size changes. This is necessary + * as it can effect dirty byte creation counts, thereby + * throwing the caches out of sync in the PHDF5 case. + * *------------------------------------------------------------------------- */ herr_t @@ -1116,6 +1843,12 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, { herr_t result; herr_t ret_value=SUCCEED; /* Return value */ + hbool_t size_changed = FALSE; + hbool_t dirtied; + size_t new_size = 0; +#ifdef H5_HAVE_PARALLEL + H5AC_aux_t * aux_ptr = NULL; +#endif /* H5_HAVE_PARALLEL */ FUNC_ENTER_NOAPI(H5AC_unprotect, FAIL) @@ -1129,6 +1862,23 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, HDassert( ((H5AC_info_t *)thing)->addr == addr ); HDassert( ((H5AC_info_t *)thing)->type == type ); + dirtied = ((flags & H5AC__DIRTIED_FLAG) == H5AC__DIRTIED_FLAG ); + + if ( dirtied ) { + + if ( (type->size)(f, thing, &new_size) < 0 ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTGETSIZE, FAIL, \ + "Can't get size of thing") + } + + if ( ((H5AC_info_t *)thing)->size != new_size ) { + + size_changed = TRUE; + flags = flags | H5AC__SIZE_CHANGED_FLAG; + } + } + #ifdef H5_HAVE_PARALLEL #ifdef H5_HAVE_FPHDF5 /* The following code to support flexible parallel is a direct copy @@ -1202,6 +1952,40 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, #endif /* H5_HAVE_FPHDF5 */ #endif /* H5_HAVE_PARALLEL */ +#ifdef H5_HAVE_PARALLEL + if ( ( dirtied ) && ( ((H5AC_info_t *)thing)->is_dirty == FALSE ) && + ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) ) { + + result = H5AC_log_dirtied_entry(f->shared->cache, + (H5AC_info_t *)thing, + addr, + size_changed, + new_size); + + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "H5AC_log_dirtied_entry() failed.") + } + } + + if ( ( (flags & H5C__DELETED_FLAG) != 0 ) && + ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) && + ( aux_ptr->mpi_rank == 0 ) ) { + + result = H5AC_log_deleted_entry(f->shared->cache, + (H5AC_info_t *)thing, + addr, + flags); + + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "H5AC_log_deleted_entry() failed.") + } + } +#endif /* H5_HAVE_PARALLEL */ + result = H5C_unprotect(f, dxpl_id, H5AC_noblock_dxpl_id, @@ -1209,7 +1993,8 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, type, addr, thing, - flags); + flags, + new_size); if ( result < 0 ) { @@ -1217,11 +2002,28 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, "H5C_unprotect() failed.") } -done: +#ifdef H5_HAVE_PARALLEL + if ( ( aux_ptr != NULL ) && + ( aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold ) ) { - FUNC_LEAVE_NOAPI(ret_value) + result = H5AC_propagate_flushed_and_still_clean_entries_list(f, + H5AC_noblock_dxpl_id, + f->shared->cache, + TRUE); -} /* H5AC_unprotect() */ + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "Can't propagate clean entries list.") + } + } +#endif /* H5_HAVE_PARALLEL */ + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_unprotect() */ /*------------------------------------------------------------------------- @@ -1686,6 +2488,184 @@ done: /*------------------------------------------------------------------------- * + * Function: H5AC_broadcast_clean_list() + * + * Purpose: Broadcast the contents of the process 0 cleaned entry + * slist. In passing, also remove all entries from said + * list, and also remove any matching entries from the dirtied + * slist. + * + * This function must only be called by the process with + * MPI_rank 0. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 7/1/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_broadcast_clean_list(H5AC_t * cache_ptr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + haddr_t addr; + H5AC_aux_t * aux_ptr = NULL; + H5SL_node_t * slist_node_ptr = NULL; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + MPI_Offset * buf_ptr = NULL; + size_t buf_size; + int i = 0; + int mpi_result; + int num_entries; + + FUNC_ENTER_NOAPI(H5AC_broadcast_clean_list, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = cache_ptr->aux_ptr; + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->mpi_rank == 0 ); + HDassert( aux_ptr->c_slist_ptr != NULL ); + HDassert( H5SL_count(aux_ptr->c_slist_ptr) == + (size_t)(aux_ptr->c_slist_len) ); + + + /* First broadcast the number of entries in the list so that the + * receives can set up a buffer to receive them. If there aren't + * any, we are done. + */ + num_entries = aux_ptr->c_slist_len; + + mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm); + + if ( mpi_result != MPI_SUCCESS ) { + + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 1", mpi_result) + + } + + if ( num_entries > 0 ) + { + /* allocate a buffer to store the list of entry base addresses in */ + + buf_size = sizeof(MPI_Offset) * (size_t)num_entries; + + buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size); + + if ( buf_ptr == NULL ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "memory allocation failed for clean entry buffer") + } + + /* now load the entry base addresses into the buffer, emptying the + * cleaned entry list in passing + */ + + while ( NULL != (slist_node_ptr = H5SL_first(aux_ptr->c_slist_ptr) ) ) + { + slist_entry_ptr = H5SL_item(slist_node_ptr); + + HDassert(slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC); + + HDassert( i < num_entries ); + + addr = slist_entry_ptr->addr; + + if ( H5FD_mpi_haddr_to_MPIOff(addr, &(buf_ptr[i])) < 0 ) { + + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, \ + "can't convert from haddr to MPI off") + } + + i++; + + /* now remove the entry from the cleaned entry list */ + if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from cleaned entry slist.") + } + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + + aux_ptr->c_slist_len -= 1; + + HDassert( aux_ptr->c_slist_len >= 0 ); + + /* and also remove the matching entry from the dirtied list + * if it exists. + */ + if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr, + (void *)(&addr))) != NULL ) { + + HDassert( slist_entry_ptr->magic == + H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); + HDassert( slist_entry_ptr->addr == addr ); + + if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from dirty entry slist.") + } + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + + aux_ptr->d_slist_len -= 1; + + HDassert( aux_ptr->d_slist_len >= 0 ); + } + + } /* while */ + + + /* Now broadcast the list of cleaned entries -- if there is one. + * + * The peculiar structure of the following call to MPI_Bcast is + * due to MPI's (?) failure to believe in the MPI_Offset type. + * Thus the element type is MPI_BYTE, with size equal to the + * buf_size computed above. + */ + + mpi_result = MPI_Bcast((void *)buf_ptr, (int)buf_size, MPI_BYTE, 0, + aux_ptr->mpi_comm); + + if ( mpi_result != MPI_SUCCESS ) { + + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result) + } + } + +done: + + if ( buf_ptr != NULL ) { + + buf_ptr = (MPI_Offset *)H5MM_xfree((void *)buf_ptr); + } + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_broadcast_clean_list() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * * Function: H5AC_check_if_write_permitted * * Purpose: Determine if a write is permitted under the current @@ -1703,6 +2683,16 @@ done: * * Modifications: * + * John Mainzer, 9/23/05 + * Rewrote function to return the value of the + * write_permitted field in aux structure if the structure + * exists and mpi_rank is 0. + * + * If the aux structure exists, but mpi_rank isn't 0, the + * function now returns FALSE. + * + * In all other cases, the function returns TRUE. + * *------------------------------------------------------------------------- */ @@ -1720,49 +2710,1087 @@ H5AC_check_if_write_permitted(const H5F_t UNUSED * f, { hbool_t write_permitted = TRUE; herr_t ret_value = SUCCEED; /* Return value */ +#ifdef H5_HAVE_PARALLEL + H5AC_aux_t * aux_ptr = NULL; +#endif /* H5_HAVE_PARALLEL */ + FUNC_ENTER_NOAPI(H5AC_check_if_write_permitted, FAIL) #ifdef H5_HAVE_PARALLEL + HDassert( f != NULL ); + HDassert( f->shared != NULL ); + HDassert( f->shared->cache != NULL ); - if ( IS_H5FD_MPI(f) ) { + aux_ptr = (H5AC_aux_t *)(f->shared->cache->aux_ptr); + + if ( aux_ptr != NULL ) { + + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + + if ( aux_ptr->mpi_rank == 0 ) { + + write_permitted = aux_ptr->write_permitted; + + } else { + + write_permitted = FALSE; + } + } +#endif /* H5_HAVE_PARALLEL */ + + *write_permitted_ptr = write_permitted; + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_check_if_write_permitted() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC_log_deleted_entry() + * + * Purpose: Log an entry for which H5C__DELETED_FLAG has been set. + * + * If mpi_rank is 0, we must make sure that the entry doesn't + * appear in the cleaned or dirty entry lists. Otherwise, + * we have nothing to do. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 6/29/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_log_deleted_entry(H5AC_t * cache_ptr, + H5AC_info_t * entry_ptr, + haddr_t addr, + unsigned int flags) +{ + herr_t ret_value = SUCCEED; /* Return value */ + H5AC_aux_t * aux_ptr = NULL; + H5AC_slist_entry_t * slist_entry_ptr = NULL; - H5P_genplist_t *dxpl; /* Dataset transfer property list */ - H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode property value */ + FUNC_ENTER_NOAPI(H5AC_log_deleted_entry, FAIL) - /* Get the dataset transfer property list */ - if ( NULL == (dxpl = H5I_object(dxpl_id)) ) { + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, \ - "not a dataset creation property list") + aux_ptr = cache_ptr->aux_ptr; + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + + HDassert( entry_ptr != NULL ); + HDassert( entry_ptr->addr == addr ); + + HDassert( (flags & H5C__DELETED_FLAG) != 0 ); + + if ( aux_ptr->mpi_rank == 0 ) { + + HDassert( aux_ptr->d_slist_ptr != NULL ); + HDassert( aux_ptr->c_slist_ptr != NULL ); + + /* if the entry appears in the dirtied entry slist, remove it. */ + if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr, + (void *)(&addr))) != NULL ) { + + HDassert( slist_entry_ptr->magic == + H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); + HDassert( slist_entry_ptr->addr == addr ); + + if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from dirty entry slist.") + } + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + + aux_ptr->d_slist_len -= 1; + + HDassert( aux_ptr->d_slist_len >= 0 ); } - /* Get the transfer mode property */ - if( H5P_get(dxpl, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode) < 0 ) { + /* if the entry appears in the cleaned entry slist, remove it. */ + if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr, + (void *)(&addr))) != NULL ) { + + HDassert( slist_entry_ptr->magic == + H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); + HDassert( slist_entry_ptr->addr == addr ); + + if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from cleaned entry slist.") + } + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; - HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, \ - "can't retrieve xfer mode") + aux_ptr->c_slist_len -= 1; + HDassert( aux_ptr->c_slist_len >= 0 ); } + } + +done: - if ( xfer_mode == H5FD_MPIO_INDEPENDENT ) { + FUNC_LEAVE_NOAPI(ret_value) - write_permitted = FALSE; +} /* H5AC_log_deleted_entry() */ +#endif /* H5_HAVE_PARALLEL */ - } else { + +/*------------------------------------------------------------------------- + * + * Function: H5AC_log_dirtied_entry() + * + * Purpose: Update the dirty_bytes count for a newly dirtied entry. + * + * If mpi_rank isnt 0, this simply means adding the size + * of the entries to the dirty_bytes count. + * + * If mpi_rank is 0, we must first check to see if the entry + * appears in the dirty entries slist. If it is, do nothing. + * If it isn't, add the size to th dirty_bytes count, add the + * entry to the dirty entries slist, and remove it from the + * cleaned list (if it is present there). + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 6/29/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_log_dirtied_entry(H5AC_t * cache_ptr, + H5AC_info_t * entry_ptr, + haddr_t addr, + hbool_t size_changed, + size_t new_size) +{ + herr_t ret_value = SUCCEED; /* Return value */ + size_t entry_size; + H5AC_aux_t * aux_ptr = NULL; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + + FUNC_ENTER_NOAPI(H5AC_log_dirtied_entry, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = cache_ptr->aux_ptr; + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + + HDassert( entry_ptr != NULL ); + HDassert( entry_ptr->addr == addr ); + HDassert( entry_ptr->is_dirty == FALSE ); + + if ( size_changed ) { + + entry_size = new_size; + + } else { + + entry_size = entry_ptr->size; + } + + if ( aux_ptr->mpi_rank == 0 ) { + + HDassert( aux_ptr->d_slist_ptr != NULL ); + HDassert( aux_ptr->c_slist_ptr != NULL ); + + if ( H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr)) == NULL ) { + + /* insert the address of the entry in the dirty entry list, and + * add its size to the dirty_bytes count. + */ + if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "Can't allocate dirty slist entry .") + } + + slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC; + slist_entry_ptr->addr = addr; + + if ( H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, + &(slist_entry_ptr->addr)) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \ + "can't insert entry into dirty entry slist.") + } + + aux_ptr->d_slist_len += 1; + aux_ptr->dirty_bytes += entry_size; +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->unprotect_dirty_bytes += entry_size; + aux_ptr->unprotect_dirty_bytes_updates += 1; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + } + + if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) != NULL ) { + + /* the entry is dirty. If it exists on the cleaned entries list, + * remove it. + */ + if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr, + (void *)(&addr))) != NULL ) { + + HDassert( slist_entry_ptr->magic == + H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); + HDassert( slist_entry_ptr->addr == addr ); + + if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) + != slist_entry_ptr ) { - HDassert(xfer_mode == H5FD_MPIO_COLLECTIVE ); + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from clean entry slist.") + } + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + aux_ptr->c_slist_len -= 1; + + HDassert( aux_ptr->c_slist_len >= 0 ); + } } + } else { + + aux_ptr->dirty_bytes += entry_size; +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->unprotect_dirty_bytes += entry_size; + aux_ptr->unprotect_dirty_bytes_updates += 1; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ } +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_log_dirtied_entry() */ #endif /* H5_HAVE_PARALLEL */ - *write_permitted_ptr = write_permitted; + +/*------------------------------------------------------------------------- + * + * Function: H5AC_log_flushed_entry() + * + * Purpose: Update the clean entry slist for the flush of an entry -- + * specifically, if the entry has been cleared, remove it + * from both the cleaned and dirtied lists if it is present. + * Otherwise, if the entry was dirty, insert the indicated + * entry address in the clean slist if it isn't there already. + * + * This function is only used in PHDF5, and should only + * be called for the process with mpi rank 0. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 6/29/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +#if 0 /* This is useful debugging code. -- JRM */ +static herr_t +H5AC_log_flushed_entry_dummy(H5C_t * cache_ptr, + haddr_t addr, + hbool_t was_dirty, + unsigned flags, + int type_id) +{ + herr_t ret_value = SUCCEED; /* Return value */ + H5AC_aux_t * aux_ptr = NULL; + + FUNC_ENTER_NOAPI(H5AC_log_flushed_entry_dummy, FAIL) + + aux_ptr = cache_ptr->aux_ptr; + if ( ( was_dirty ) && ( (flags & H5C__FLUSH_CLEAR_ONLY_FLAG) == 0 ) ) { + + HDfprintf(stdout, + "%d:H5AC_log_flushed_entry(): addr = %d, flags = %x, was_dirty = %d, type_id = %d\n", + (int)(aux_ptr->mpi_rank), (int)addr, flags, (int)was_dirty, type_id); + } done: FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC_check_if_write_permitted() */ +} /* H5AC_log_flushed_entry_dummy() */ +#endif /* JRM */ + +static herr_t +H5AC_log_flushed_entry(H5C_t * cache_ptr, + haddr_t addr, + hbool_t was_dirty, + unsigned flags, + UNUSED int type_id) +{ + herr_t ret_value = SUCCEED; /* Return value */ + hbool_t cleared; + H5AC_aux_t * aux_ptr; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + + + FUNC_ENTER_NOAPI(H5AC_log_flushed_entry, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = cache_ptr->aux_ptr; + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->mpi_rank == 0 ); + HDassert( aux_ptr->c_slist_ptr != NULL ); + + cleared = ( (flags & H5C__FLUSH_CLEAR_ONLY_FLAG) != 0 ); + + if ( cleared ) { + + /* If the entry has been cleared, must remove it from both the + * cleaned list and the dirtied list. + */ + + if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr, + (void *)(&addr))) != NULL ) { + + HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC); + HDassert( slist_entry_ptr->addr == addr ); + + if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from clean entry slist.") + } + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + + aux_ptr->c_slist_len -= 1; + + HDassert( aux_ptr->c_slist_len >= 0 ); + } + + if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr, + (void *)(&addr))) != NULL ) { + + HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC); + HDassert( slist_entry_ptr->addr == addr ); + + if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from dirty entry slist.") + } + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + + aux_ptr->d_slist_len -= 1; + + HDassert( aux_ptr->d_slist_len >= 0 ); + } + } else if ( was_dirty ) { + + if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) == NULL ) { + + /* insert the address of the entry in the clean entry list. */ + + if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "Can't allocate clean slist entry .") + } + + slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC; + slist_entry_ptr->addr = addr; + + if ( H5SL_insert(aux_ptr->c_slist_ptr, slist_entry_ptr, + &(slist_entry_ptr->addr)) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \ + "can't insert entry into clean entry slist.") + } + + aux_ptr->c_slist_len += 1; + } + } + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_log_flushed_entry() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC_log_inserted_entry() + * + * Purpose: Update the dirty_bytes count for a newly inserted entry. + * + * If mpi_rank isnt 0, this simply means adding the size + * of the entry to the dirty_bytes count. + * + * If mpi_rank is 0, we must also add the entry to the + * dirty entries slist. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 6/30/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_log_inserted_entry(H5F_t * f, + H5AC_t * cache_ptr, + H5AC_info_t * entry_ptr, + const H5AC_class_t * type, + haddr_t addr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + size_t size; + H5AC_aux_t * aux_ptr = NULL; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + + FUNC_ENTER_NOAPI(H5AC_log_inserted_entry, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = cache_ptr->aux_ptr; + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + + HDassert( entry_ptr != NULL ); + HDassert( entry_ptr->addr == addr ); + HDassert( entry_ptr->type == type ); + + /* the size field of the entry will not have been set yet, so we + * have to obtain it directly. + */ + if ( (type->size)(f, (void *)entry_ptr, &size) < 0 ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTGETSIZE, FAIL, \ + "Can't get size of entry to be inserted.") + } + + if ( aux_ptr->mpi_rank == 0 ) { + + HDassert( aux_ptr->d_slist_ptr != NULL ); + HDassert( aux_ptr->c_slist_ptr != NULL ); + + if ( H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr)) == NULL ) { + + /* insert the address of the entry in the dirty entry list, and + * add its size to the dirty_bytes count. + */ + if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "Can't allocate dirty slist entry .") + } + + slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC; + slist_entry_ptr->addr = addr; + + if ( H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, + &(slist_entry_ptr->addr)) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \ + "can't insert entry into dirty entry slist.") + } + + aux_ptr->d_slist_len += 1; + + } else { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Inserted entry already in dirty slist.") + } + + if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) != NULL ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Inserted entry in clean slist.") + } + } + + aux_ptr->dirty_bytes += size; + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->insert_dirty_bytes += size; + aux_ptr->insert_dirty_bytes_updates += 1; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_log_inserted_entry() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC_log_renamed_entry() + * + * Purpose: Update the dirty_bytes count for a renamed entry. + * + * WARNING + * + * At present, the way that the rename call is used ensures + * that the renamed entry is present in all caches by + * renaming in a collective operation and immediately after + * unprotecting the target entry. + * + * This function uses this invarient, and will cause arcane + * failures if it is not met. If maintaining this invarient + * becomes impossible, we will have to rework this function + * extensively, and likely include a bit of IPC for + * synchronization. A better option might be to subsume + * rename in the unprotect operation. + * + * Given that the target entry is in all caches, the function + * proceeds as follows: + * + * For processes with mpi rank other 0, it simply checks to + * see if the entry was dirty prior to the rename, and adds + * the entries size to the dirty bytes count. + * + * In the process with mpi rank 0, the function first checks + * to see if the entry was dirty prior to the rename. If it + * was, and if the entry doesn't appear in the dirtied list + * under its old address, it adds the entry's size to the + * dirty bytes count. + * + * The rank 0 process then removes any references to the + * entry under its old address from the cleands and dirtied + * lists, and inserts an entry in the dirtied list under the + * new address. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 6/30/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_log_renamed_entry(H5AC_t * cache_ptr, + haddr_t old_addr, + haddr_t new_addr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + hbool_t entry_in_cache; + hbool_t entry_dirty; + size_t entry_size; + H5AC_aux_t * aux_ptr = NULL; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + + FUNC_ENTER_NOAPI(H5AC_log_renamed_entry, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = cache_ptr->aux_ptr; + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + + /* get entry status, size, etc here */ + if ( H5C_get_entry_status(cache_ptr, old_addr, &entry_size, &entry_in_cache, + &entry_dirty, NULL) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't get entry status.") + + } else if ( ! entry_in_cache ) { + + HDassert( entry_in_cache ); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry not in cache.") + } + + if ( aux_ptr->mpi_rank == 0 ) { + + HDassert( aux_ptr->d_slist_ptr != NULL ); + HDassert( aux_ptr->c_slist_ptr != NULL ); + + /* if the entry appears in the cleaned entry slist, under its old + * address, remove it. + */ + if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr, + (void *)(&old_addr))) != NULL ) { + + HDassert( slist_entry_ptr->magic == + H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); + HDassert( slist_entry_ptr->addr == old_addr ); + + if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&old_addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from cleaned entry slist.") + } + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + + aux_ptr->c_slist_len -= 1; + + HDassert( aux_ptr->c_slist_len >= 0 ); + } + + /* if the entry appears in the dirtied entry slist under its old + * address, remove it, but don't free it. Set addr to new_addr. + */ + if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr, + (void *)(&old_addr))) != NULL ) { + + HDassert( slist_entry_ptr->magic == + H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); + HDassert( slist_entry_ptr->addr == old_addr ); + + if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&old_addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from dirty entry slist.") + } + + slist_entry_ptr->addr = new_addr; + + aux_ptr->d_slist_len -= 1; + + HDassert( aux_ptr->d_slist_len >= 0 ); + + } else { + + /* otherwise, allocate a new entry that is ready + * for insertion, and increment dirty_bytes. + * + * Note that the fact that the entry wasn't in the dirtied + * list under its old address implies that it must have + * been clean to start with. + */ + + HDassert( !entry_dirty ); + + if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "Can't allocate dirty slist entry .") + } + + slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC; + slist_entry_ptr->addr = new_addr; + + aux_ptr->dirty_bytes += entry_size; + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->rename_dirty_bytes += entry_size; + aux_ptr->rename_dirty_bytes_updates += 1; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + } + + /* verify that there is no entry at new_addr in the dirty slist */ + if ( H5SL_search(aux_ptr->d_slist_ptr, (void *)(&new_addr)) != NULL ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "dirty slist already contains entry at new_addr.") + } + + /* insert / reinsert the entry in the dirty slist */ + if ( H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, + &(slist_entry_ptr->addr)) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \ + "can't insert entry into dirty entry slist.") + } + + aux_ptr->d_slist_len += 1; + + } else if ( ! entry_dirty ) { + + aux_ptr->dirty_bytes += entry_size; + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->rename_dirty_bytes += entry_size; + aux_ptr->rename_dirty_bytes_updates += 1; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + } + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_log_renamed_entry() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * Function: H5AC_propagate_flushed_and_still_clean_entries_list + * + * Purpose: In PHDF5, only the metadata cache with mpi rank 0 is allowed + * to write to file. All other metadata caches on processes + * with rank greater than 0 must retain dirty entries until + * they are notified that the entry is now clean. + * + * This function is the main routine for that proceedure. + * It must be called simultaniously on all processes that + * have the relevant file open. To this end, there must + * be a barrier immediately prior to this call. + * + * Typicaly, this will be done one of two ways: + * + * 1) Dirty byte creation exceeds some user specified value. + * + * While metadata reads may occur independently, all + * operations writing metadata must be collective. Thus + * all metadata caches see the same sequence of operations, + * and therefore the same dirty data creation. + * + * This fact is used to synchronize the caches for purposes + * of propagating the list of flushed and still clean + * entries, by simply calling this function from all + * caches whenever some user specified threshold on dirty + * data is exceeded. + * + * 2) Under direct user control -- this operation must be + * collective. + * + * The operations to be managed by this function are as + * follows: + * + * For the process with mpi rank 0: + * + * 1) Enable writes, flush the cache to its min clean size, + * and then disable writes again. + * + * 2) Load the contents of the flushed and still clean entries + * list (c_slist_ptr) into a buffer, and broadcast that + * buffer to all the other caches. + * + * 3) Clear the flushed and still clean entries list + * (c_slist_ptr). + * + * + * For all processes with mpi rank greater than 0: + * + * 1) Receive the flushed and still clean entries list broadcast + * + * 2) Mark the specified entries as clean. + * + * + * For all processes: + * + * 1) Reset the dirtied bytes count to 0. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * July 5, 2005 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +herr_t +H5AC_propagate_flushed_and_still_clean_entries_list(H5F_t * f, + hid_t dxpl_id, + H5AC_t * cache_ptr, + hbool_t do_barrier) +{ + herr_t ret_value = SUCCEED; /* Return value */ + herr_t result; + int mpi_code; + H5AC_aux_t * aux_ptr = NULL; + + FUNC_ENTER_NOAPI(H5AC_propagate_flushed_and_still_clean_entries_list, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = cache_ptr->aux_ptr; + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + HDfprintf(stdout, + "%d:H5AC_propagate...:%d: (u/uu/i/iu/r/ru) = %d/%d/%d/%d/%d/%d\n", + (int)(aux_ptr->mpi_rank), + (int)(aux_ptr->dirty_bytes_propagations), + (int)(aux_ptr->unprotect_dirty_bytes), + (int)(aux_ptr->unprotect_dirty_bytes_updates), + (int)(aux_ptr->insert_dirty_bytes), + (int)(aux_ptr->insert_dirty_bytes_updates), + (int)(aux_ptr->rename_dirty_bytes), + (int)(aux_ptr->rename_dirty_bytes_updates)); +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + + if ( do_barrier ) { + + /* to prevent "messages from the future" we must synchronize all + * processes before we start the flush. This synchronization may + * already be done -- hence the do_barrier parameter. + */ + + if ( MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)) ) { + + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) + } + } + + if ( aux_ptr->mpi_rank == 0 ) { + + aux_ptr->write_permitted = TRUE; + + result = H5C_flush_to_min_clean(f, dxpl_id, H5AC_noblock_dxpl_id, + cache_ptr); + + aux_ptr->write_permitted = FALSE; + + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "H5C_flush_to_min_clean() failed.") + } + + if ( H5AC_broadcast_clean_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Can't broadcast clean slist.") + } + + HDassert( aux_ptr->c_slist_len == 0 ); + + } else { + + if ( H5AC_receive_and_apply_clean_list(f, dxpl_id, + H5AC_noblock_dxpl_id, + cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Can't receive and/or process clean slist broadcast.") + } + } + + aux_ptr->dirty_bytes = 0; +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->dirty_bytes_propagations += 1; + aux_ptr->unprotect_dirty_bytes = 0; + aux_ptr->unprotect_dirty_bytes_updates = 0; + aux_ptr->insert_dirty_bytes = 0; + aux_ptr->insert_dirty_bytes_updates = 0; + aux_ptr->rename_dirty_bytes = 0; + aux_ptr->rename_dirty_bytes_updates = 0; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_propagate_flushed_and_still_clean_entries_list() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC_receive_and_apply_clean_list() + * + * Purpose: Receive the list of cleaned entries from process 0, + * and mark the specified entries as clean. + * + * This function must only be called by the process with + * MPI_rank greater than 0. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 7/4/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_receive_and_apply_clean_list(H5F_t * f, + hid_t primary_dxpl_id, + hid_t secondary_dxpl_id, + H5AC_t * cache_ptr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + H5AC_aux_t * aux_ptr = NULL; + haddr_t * haddr_buf_ptr = NULL; + MPI_Offset * MPI_Offset_buf_ptr = NULL; + size_t buf_size; + int i = 0; + int mpi_result; + int num_entries; + + FUNC_ENTER_NOAPI(H5AC_receive_and_apply_clean_list, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = cache_ptr->aux_ptr; + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->mpi_rank != 0 ); + + /* First receive the number of entries in the list so that we + * can set up a buffer to receive them. If there aren't + * any, we are done. + */ + mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm); + + if ( mpi_result != MPI_SUCCESS ) { + + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 1", mpi_result) + } + + if ( num_entries > 0 ) + { + /* allocate a buffers to store the list of entry base addresses in */ + + buf_size = sizeof(MPI_Offset) * (size_t)num_entries; + + MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size); + + if ( MPI_Offset_buf_ptr == NULL ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "memory allocation failed for receive buffer") + } + + haddr_buf_ptr = (haddr_t *)H5MM_malloc(sizeof(haddr_t) * + (size_t)num_entries); + + if ( haddr_buf_ptr == NULL ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "memory allocation failed for haddr buffer") + } + + + /* Now receive the list of cleaned entries + * + * The peculiar structure of the following call to MPI_Bcast is + * due to MPI's (?) failure to believe in the MPI_Offset type. + * Thus the element type is MPI_BYTE, with size equal to the + * buf_size computed above. + */ + + mpi_result = MPI_Bcast((void *)MPI_Offset_buf_ptr, (int)buf_size, + MPI_BYTE, 0, aux_ptr->mpi_comm); + + if ( mpi_result != MPI_SUCCESS ) { + + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result) + } + + + /* translate the MPI_Offsets to haddr_t */ + i = 0; + while ( i < num_entries ) + { + haddr_buf_ptr[i] = H5FD_mpi_MPIOff_to_haddr(MPI_Offset_buf_ptr[i]); + + if ( haddr_buf_ptr[i] == HADDR_UNDEF ) { + + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, \ + "can't convert MPI off to haddr") + } + + i++; + } + + + /* mark the indicated entries as clean */ + if ( H5C_mark_entries_as_clean(f, primary_dxpl_id, secondary_dxpl_id, + cache_ptr, (int32_t)num_entries, + &(haddr_buf_ptr[0])) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Can't mark entries clean.") + + } + } + +done: + + if ( MPI_Offset_buf_ptr != NULL ) { + + MPI_Offset_buf_ptr = + (MPI_Offset *)H5MM_xfree((void *)MPI_Offset_buf_ptr); + } + + if ( haddr_buf_ptr != NULL ) { + + haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); + } + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_receive_and_apply_clean_list() */ +#endif /* H5_HAVE_PARALLEL */ + diff --git a/src/H5ACprivate.h b/src/H5ACprivate.h index 450907f..3cbe62e 100644 --- a/src/H5ACprivate.h +++ b/src/H5ACprivate.h @@ -182,7 +182,7 @@ extern hid_t H5AC_ind_dxpl_id; /* int version = */ H5C__CURR_AUTO_SIZE_CTL_VER, \ /* hbool_t rpt_fcn_enabled = */ FALSE, \ /* hbool_t set_initial_size = */ TRUE, \ - /* size_t initial_size = */ (1 * 1024 * 1024), \ + /* size_t initial_size = */ ( 1 * 1024 * 1024), \ /* double min_clean_fraction = */ 0.25, \ /* size_t max_size = */ (16 * 1024 * 1024), \ /* size_t min_size = */ ( 1 * 1024 * 1024), \ @@ -216,6 +216,7 @@ extern hid_t H5AC_ind_dxpl_id; #define H5AC__SET_FLUSH_MARKER_FLAG H5C__SET_FLUSH_MARKER_FLAG #define H5AC__DELETED_FLAG H5C__DELETED_FLAG #define H5AC__DIRTIED_FLAG H5C__DIRTIED_FLAG +#define H5AC__SIZE_CHANGED_FLAG H5C__SIZE_CHANGED_FLAG #define H5AC__FLUSH_INVALIDATE_FLAG H5C__FLUSH_INVALIDATE_FLAG #define H5AC__FLUSH_CLEAR_ONLY_FLAG H5C__FLUSH_CLEAR_ONLY_FLAG #define H5AC__FLUSH_MARKED_ENTRIES_FLAG H5C__FLUSH_MARKED_ENTRIES_FLAG @@ -235,6 +236,7 @@ H5_DLL herr_t H5AC_unprotect(H5F_t *f, hid_t dxpl_id, H5_DLL herr_t H5AC_flush(H5F_t *f, hid_t dxpl_id, unsigned flags); H5_DLL herr_t H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_addr); + H5_DLL herr_t H5AC_dest(H5F_t *f, hid_t dxpl_id); H5_DLL herr_t H5AC_stats(const H5F_t *f); diff --git a/src/H5C.c b/src/H5C.c index afad50b..8ac73e7 100644 --- a/src/H5C.c +++ b/src/H5C.c @@ -200,6 +200,12 @@ * caused compiler warnings. * JRM - 1/10/05 * + * - Added the H5C__DLL_UPDATE_FOR_SIZE_CHANGE macro and the associated + * sanity checking macros. These macro are used to update the size of + * a DLL when one of its entries changes size. + * + * JRM - 9/8/05 + * ****************************************************************************/ #if H5C_DO_SANITY_CHECKS @@ -267,11 +273,29 @@ if ( ( (entry_ptr) == NULL ) || \ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "DLL pre insert SC failed") \ } +#define H5C__DLL_PRE_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size) \ +if ( ( (dll_len) <= 0 ) || \ + ( (dll_size) <= 0 ) || \ + ( (old_size) <= 0 ) || \ + ( (old_size) > (dll_size) ) || \ + ( (new_size) <= 0 ) || \ + ( ( (dll_len) == 1 ) && ( (old_size) != (dll_size) ) ) ) { \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "DLL pre size update SC failed") \ +} + +#define H5C__DLL_POST_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size) \ +if ( ( (new_size) > (dll_size) ) || \ + ( ( (dll_len) == 1 ) && ( (new_size) != (dll_size) ) ) ) { \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "DLL post size update SC failed") \ +} + #else /* H5C_DO_SANITY_CHECKS */ #define H5C__DLL_PRE_REMOVE_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv) #define H5C__DLL_SC(head_ptr, tail_ptr, len, Size, fv) #define H5C__DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv) +#define H5C__DLL_PRE_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size) +#define H5C__DLL_POST_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size) #endif /* H5C_DO_SANITY_CHECKS */ @@ -344,6 +368,11 @@ if ( ( (entry_ptr) == NULL ) || \ (Size) -= entry_ptr->size; \ } +#define H5C__DLL_UPDATE_FOR_SIZE_CHANGE(dll_len, dll_size, old_size, new_size) \ + H5C__DLL_PRE_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size) \ + (dll_size) -= (old_size); \ + (dll_size) += (new_size); \ + H5C__DLL_POST_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size) #if H5C_DO_SANITY_CHECKS @@ -534,6 +563,19 @@ if ( ( (entry_ptr) == NULL ) || \ #define H5C__UPDATE_STATS_FOR_RENAME(cache_ptr, entry_ptr) \ (((cache_ptr)->renames)[(entry_ptr)->type->id])++; +#define H5C__UPDATE_STATS_FOR_ENTRY_SIZE_CHANGE(cache_ptr, entry_ptr, new_size)\ + if ( (entry_ptr)->size < (new_size) ) { \ + ((cache_ptr)->size_increases[(entry_ptr)->type->id])++; \ + if ( (cache_ptr)->index_size > (cache_ptr)->max_index_size ) \ + (cache_ptr)->max_index_size = (cache_ptr)->index_size; \ + if ( (cache_ptr)->slist_size > (cache_ptr)->max_slist_size ) \ + (cache_ptr)->max_slist_size = (cache_ptr)->slist_size; \ + if ( (cache_ptr)->pl_size > (cache_ptr)->max_pl_size ) \ + (cache_ptr)->max_pl_size = (cache_ptr)->pl_size; \ + } else { \ + ((cache_ptr)->size_decreases[(entry_ptr)->type->id])++; \ + } + #define H5C__UPDATE_STATS_FOR_HT_INSERTION(cache_ptr) \ (cache_ptr)->total_ht_insertions++; @@ -646,6 +688,7 @@ if ( ( (entry_ptr) == NULL ) || \ #define H5C__RESET_CACHE_ENTRY_STATS(entry_ptr) #define H5C__UPDATE_STATS_FOR_UNPROTECT(cache_ptr) #define H5C__UPDATE_STATS_FOR_RENAME(cache_ptr, entry_ptr) +#define H5C__UPDATE_STATS_FOR_ENTRY_SIZE_CHANGE(cache_ptr, entry_ptr, new_size) #define H5C__UPDATE_STATS_FOR_HT_INSERTION(cache_ptr) #define H5C__UPDATE_STATS_FOR_HT_DELETION(cache_ptr) #define H5C__UPDATE_STATS_FOR_HT_SEARCH(cache_ptr, success, depth) @@ -748,9 +791,32 @@ if ( ( (cache_ptr) == NULL ) || \ ( ((cache_ptr)->index)[k] != (entry_ptr) ) || \ ( (entry_ptr)->ht_prev != NULL ) ) { \ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, \ - "Post HT shift to front SC failed") \ + "Post HT shift to front SC failed") \ +} + +#define H5C__PRE_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) \ +if ( ( (cache_ptr) == NULL ) || \ + ( (cache_ptr)->index_len <= 0 ) || \ + ( (cache_ptr)->index_size <= 0 ) || \ + ( (new_size) <= 0 ) || \ + ( (old_size) > (cache_ptr)->index_size ) || \ + ( (new_size) <= 0 ) || \ + ( ( (cache_ptr)->index_len == 1 ) && \ + ( (cache_ptr)->index_size != (old_size) ) ) ) { \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Pre HT entry size change SC failed") \ } +#define H5C__POST_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) \ +if ( ( (cache_ptr) == NULL ) || \ + ( (cache_ptr)->index_len <= 0 ) || \ + ( (cache_ptr)->index_size <= 0 ) || \ + ( (new_size) > (cache_ptr)->index_size ) || \ + ( ( (cache_ptr)->index_len == 1 ) && \ + ( (cache_ptr)->index_size != (new_size) ) ) ) { \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Post HT entry size change SC failed") \ +} #else /* H5C_DO_SANITY_CHECKS */ @@ -759,6 +825,8 @@ if ( ( (cache_ptr) == NULL ) || \ #define H5C__PRE_HT_SEARCH_SC(cache_ptr, Addr, fail_val) #define H5C__POST_SUC_HT_SEARCH_SC(cache_ptr, entry_ptr, Addr, k, fail_val) #define H5C__POST_HT_SHIFT_TO_FRONT(cache_ptr, entry_ptr, k, fail_val) +#define H5C__PRE_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) +#define H5C__POST_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) #endif /* H5C_DO_SANITY_CHECKS */ @@ -840,6 +908,14 @@ if ( ( (cache_ptr) == NULL ) || \ H5C__UPDATE_STATS_FOR_HT_SEARCH(cache_ptr, (entry_ptr != NULL), depth) \ } +#define H5C__UPDATE_INDEX_FOR_SIZE_CHANGE(cache_ptr, old_size, new_size) \ +{ \ + H5C__PRE_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) \ + (cache_ptr)->index_size -= old_size; \ + (cache_ptr)->index_size += new_size; \ + H5C__POST_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) \ +} + /************************************************************************** * @@ -896,7 +972,7 @@ if ( ( (cache_ptr) == NULL ) || \ HDassert( H5F_addr_defined((entry_ptr)->addr) ); \ HDassert( !((entry_ptr)->in_slist) ); \ \ - if ( H5SL_insert((cache_ptr)->slist_ptr, &(entry_ptr)->addr, entry_ptr) \ + if ( H5SL_insert((cache_ptr)->slist_ptr, entry_ptr, &(entry_ptr)->addr) \ < 0 ) \ HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, FAIL, \ "Can't insert entry in skip list") \ @@ -963,6 +1039,44 @@ if ( ( (cache_ptr) == NULL ) || \ } /* H5C__REMOVE_ENTRY_FROM_SLIST */ +/*------------------------------------------------------------------------- + * + * Function: H5C__UPDATE_SLIST_FOR_SIZE_CHANGE + * + * Purpose: Update cache_ptr->slist_size for a change in the size of + * and entry in the slist. + * + * Return: N/A + * + * Programmer: John Mainzer, 9/07/05 + * + * Modifications: + * + * None. + * + *------------------------------------------------------------------------- + */ + +#define H5C__UPDATE_SLIST_FOR_SIZE_CHANGE(cache_ptr, old_size, new_size) \ +{ \ + HDassert( (cache_ptr) ); \ + HDassert( (cache_ptr)->magic == H5C__H5C_T_MAGIC ); \ + HDassert( (old_size) > 0 ); \ + HDassert( (new_size) > 0 ); \ + HDassert( (old_size) <= (cache_ptr)->slist_size ); \ + HDassert( (cache_ptr)->slist_len > 0 ); \ + HDassert( ((cache_ptr)->slist_len > 1) || \ + ( (cache_ptr)->slist_size == (old_size) ) ); \ + \ + (cache_ptr)->slist_size -= (old_size); \ + (cache_ptr)->slist_size += (new_size); \ + \ + HDassert( (new_size) <= (cache_ptr)->slist_size ); \ + HDassert( ( (cache_ptr)->slist_len > 1 ) || \ + ( (cache_ptr)->slist_size == (new_size) ) ); \ +} /* H5C__REMOVE_ENTRY_FROM_SLIST */ + + /************************************************************************** * * Replacement policy update macros: @@ -1732,6 +1846,11 @@ static herr_t H5C_make_space_in_cache(H5F_t * f, size_t space_needed, hbool_t write_permitted, hbool_t * first_flush_ptr); +#if H5C_DO_EXTREME_SANITY_CHECKS +static herr_t H5C_validate_lru_list(H5C_t * cache_ptr); +static herr_t H5C_verify_not_in_index(H5C_t * cache_ptr, + H5C_cache_entry_t * entry_ptr); +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ /**************************************************************************** @@ -1874,8 +1993,8 @@ done: * * The check_write_permitted parameter must either be NULL, * or point to a function of type H5C_write_permitted_func_t. - * If it is NULL, the cache will presume that writes are - * always permitted. + * If it is NULL, the cache will use the write_permitted + * flag to determine whether writes are permitted. * * Return: Success: Pointer to the new instance. * @@ -1901,6 +2020,16 @@ done: * Added/updated initialization for the automatic cache * size control data structures. * + * JRM -- 6/24/05 + * Added support for the new write_permitted field of + * the H5C_t structure. + * + * JRM -- 7/5/05 + * Added the new log_flush parameter and supporting code. + * + * JRM -- 9/21/05 + * Added the new aux_ptr parameter and supporting code. + * *------------------------------------------------------------------------- */ @@ -1909,7 +2038,10 @@ H5C_create(size_t max_cache_size, size_t min_clean_size, int max_type_id, const char * (* type_name_table_ptr), - H5C_write_permitted_func_t check_write_permitted) + H5C_write_permitted_func_t check_write_permitted, + hbool_t write_permitted, + H5C_log_flush_func_t log_flush, + void * aux_ptr) { int i; H5C_t * cache_ptr = NULL; @@ -1925,6 +2057,8 @@ H5C_create(size_t max_cache_size, HDassert( max_type_id < H5C__MAX_NUM_TYPE_IDS ); HDassert( type_name_table_ptr ); + HDassert( ( write_permitted == TRUE ) || ( write_permitted == FALSE ) ); + for ( i = 0; i <= max_type_id; i++ ) { HDassert( (type_name_table_ptr)[i] ); @@ -1950,6 +2084,8 @@ H5C_create(size_t max_cache_size, cache_ptr->magic = H5C__H5C_T_MAGIC; + cache_ptr->aux_ptr = aux_ptr; + cache_ptr->max_type_id = max_type_id; cache_ptr->type_name_table_ptr = type_name_table_ptr; @@ -1957,6 +2093,9 @@ H5C_create(size_t max_cache_size, cache_ptr->min_clean_size = min_clean_size; cache_ptr->check_write_permitted = check_write_permitted; + cache_ptr->write_permitted = write_permitted; + + cache_ptr->log_flush = log_flush; cache_ptr->index_len = 0; cache_ptr->index_size = (size_t)0; @@ -2426,11 +2565,13 @@ H5C_flush_cache(H5F_t * f, hbool_t first_flush = TRUE; int32_t protected_entries = 0; int32_t i; - H5SL_node_t * node_ptr; - H5C_cache_entry_t * entry_ptr; + H5SL_node_t * node_ptr = NULL; + H5C_cache_entry_t * entry_ptr = NULL; #if H5C_DO_SANITY_CHECKS int32_t actual_slist_len = 0; + int32_t initial_slist_len = 0; size_t actual_slist_size = 0; + size_t initial_slist_size = 0; #endif /* H5C_DO_SANITY_CHECKS */ FUNC_ENTER_NOAPI(H5C_flush_cache, FAIL) @@ -2468,12 +2609,28 @@ H5C_flush_cache(H5F_t * f, } else { node_ptr = H5SL_first(cache_ptr->slist_ptr); + +#if H5C_DO_SANITY_CHECKS + /* H5C_flush_single_entry() now removes dirty entries from the + * slist as it flushes them. Thus for sanity checks we must + * make note of the initial slist length and size before we + * do any flushes. + */ + initial_slist_len = cache_ptr->slist_len; + initial_slist_size = cache_ptr->slist_size; +#endif /* H5C_DO_SANITY_CHECKS */ + } while ( node_ptr != NULL ) { entry_ptr = (H5C_cache_entry_t *)H5SL_item(node_ptr); + /* increment node pointer now, before we delete its target + * from the slist. + */ + node_ptr = H5SL_next(node_ptr); + HDassert( entry_ptr != NULL ); HDassert( entry_ptr->in_slist ); @@ -2512,14 +2669,22 @@ H5C_flush_cache(H5F_t * f, } } } - - node_ptr = H5SL_next(node_ptr); - } /* while */ #if H5C_DO_SANITY_CHECKS - HDassert( actual_slist_len == cache_ptr->slist_len ); - HDassert( actual_slist_size == cache_ptr->slist_size ); + HDassert( actual_slist_len == initial_slist_len ); + HDassert( actual_slist_size == initial_slist_size ); + + if ( (flags & H5C__FLUSH_INVALIDATE_FLAG) != 0 ) { + + HDassert( cache_ptr->slist_len == initial_slist_len ); + HDassert( cache_ptr->slist_size == initial_slist_size ); + + } else if ( ! flush_marked_entries ) { + + HDassert( cache_ptr->slist_len == 0 ); + HDassert( cache_ptr->slist_size == 0 ); + } #endif /* H5C_DO_SANITY_CHECKS */ if ( destroy ) { @@ -2625,6 +2790,95 @@ done: /*------------------------------------------------------------------------- + * Function: H5C_flush_to_min_clean + * + * Purpose: Flush dirty entries until the caches min clean size is + * attained. + * + * This function is used in the implementation of the + * metadata cache in PHDF5. To avoid "messages from the + * future", the cache on process 0 can't be allowed to + * flush entries until the other processes have reached + * the same point in the calculation. If this constraint + * is not met, it is possible that the other processes will + * read metadata generated at a future point in the + * computation. + * + * + * Return: Non-negative on success/Negative on failure or if + * write is not permitted. + * + * Programmer: John Mainzer + * 9/16/05 + * + * Modifications: + * + * None. + * + *------------------------------------------------------------------------- + */ +herr_t +H5C_flush_to_min_clean(H5F_t * f, + hid_t primary_dxpl_id, + hid_t secondary_dxpl_id, + H5C_t * cache_ptr) +{ + herr_t result; + herr_t ret_value = SUCCEED; + hbool_t first_flush = TRUE; + hbool_t write_permitted; + + FUNC_ENTER_NOAPI(H5C_flush_to_min_clean, FAIL) + + HDassert( cache_ptr ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + HDassert( cache_ptr->skip_file_checks || f ); + + if ( cache_ptr->check_write_permitted != NULL ) { + + result = (cache_ptr->check_write_permitted)(f, + primary_dxpl_id, + &write_permitted); + + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Can't get write_permitted") + } + } else { + + write_permitted = cache_ptr->write_permitted; + } + + if ( ! write_permitted ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "cache write is not permitted!?!\n"); + } + + + result = H5C_make_space_in_cache(f, + primary_dxpl_id, + secondary_dxpl_id, + cache_ptr, + 0, + write_permitted, + &first_flush); + + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "H5C_make_space_in_cache failed.") + } + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5C_flush_to_min_clean() */ + + +/*------------------------------------------------------------------------- * Function: H5C_get_cache_auto_resize_config * * Purpose: Copy the current configuration of the cache automatic @@ -2798,6 +3052,93 @@ done: /*------------------------------------------------------------------------- + * + * Function: H5C_get_entry_status + * + * Purpose: This function is used to determine whether the cache + * contains an entry with the specified base address. If + * the entry exists, it also reports some status information + * on the entry. + * + * Status information is reported in the locations pointed + * to by the size_ptr, in_cache_ptr, is_dirty_ptr, and + * is_protected_ptr. While in_cache_ptr must be defined, + * the remaining pointers may be NULL, in which case the + * associated data is not reported. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: John Mainzer + * 7/1/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +herr_t +H5C_get_entry_status(H5C_t * cache_ptr, + haddr_t addr, + size_t * size_ptr, + hbool_t * in_cache_ptr, + hbool_t * is_dirty_ptr, + hbool_t * is_protected_ptr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + H5C_cache_entry_t * entry_ptr = NULL; + + FUNC_ENTER_NOAPI(H5C_get_entry_status, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + HDassert( H5F_addr_defined(addr) ); + HDassert( in_cache_ptr != NULL ); + + /* this test duplicates tow of the above asserts, but we need an + * invocation of HGOTO_ERROR to keep the compiler happy. + */ + if ( ( cache_ptr == NULL ) || ( cache_ptr->magic != H5C__H5C_T_MAGIC ) ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Bad cache_ptr on entry.") + } + + H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL) + + if ( entry_ptr == NULL ) { + + /* the entry doesn't exist in the cache -- report this + * and quit. + */ + *in_cache_ptr = FALSE; + + } else { + + *in_cache_ptr = TRUE; + + if ( size_ptr != NULL ) { + + *size_ptr = entry_ptr->size; + } + + if ( is_dirty_ptr != NULL ) { + + *is_dirty_ptr = entry_ptr->is_dirty; + } + + if ( is_protected_ptr != NULL ) { + + *is_protected_ptr = entry_ptr->is_protected; + } + } + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5C_get_entry_status() */ + + +/*------------------------------------------------------------------------- * Function: H5C_insert_entry * * Purpose: Adds the specified thing to the cache. The thing need not @@ -2845,6 +3186,10 @@ done: * This is part of a set of changes moving management of the * is_dirty field of H5C_cache_entry_t into the H5C code. * + * JRM -- 6/24/05 + * Added support for the new write_permitted field of + * the H5C_t structure. + * *------------------------------------------------------------------------- */ @@ -2877,6 +3222,21 @@ H5C_insert_entry(H5F_t * f, HDassert( H5F_addr_defined(addr) ); HDassert( thing ); +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_verify_not_in_index(cache_ptr, (H5C_cache_entry_t *)thing) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "thing already in index.\n"); + } +#endif /* H5C_DO_SANITY_CHECKS */ + +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + set_flush_marker = ( (flags & H5C__SET_FLUSH_MARKER_FLAG) != 0 ); entry_ptr = (H5C_cache_entry_t *)thing; @@ -2897,6 +3257,10 @@ H5C_insert_entry(H5F_t * f, entry_ptr->in_slist = FALSE; +#ifdef H5_HAVE_PARALLEL + entry_ptr->clear_on_unprotect = FALSE; +#endif /* H5_HAVE_PARALLEL */ + entry_ptr->ht_next = NULL; entry_ptr->ht_prev = NULL; @@ -2925,6 +3289,9 @@ H5C_insert_entry(H5F_t * f, HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, \ "Can't get write_permitted") } + } else { + + write_permitted = cache_ptr->write_permitted; } HDassert( entry_ptr->size <= H5C_MAX_ENTRY_SIZE ); @@ -3022,10 +3389,26 @@ H5C_insert_entry(H5F_t * f, H5C__UPDATE_RP_FOR_INSERTION(cache_ptr, entry_ptr, FAIL) +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + H5C__UPDATE_STATS_FOR_INSERTION(cache_ptr, entry_ptr) done: +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + FUNC_LEAVE_NOAPI(ret_value) } /* H5C_insert_entry() */ @@ -3033,6 +3416,171 @@ done: /*------------------------------------------------------------------------- * + * Function: H5C_mark_entries_as_clean + * + * Purpose: When the H5C code is used to implement the metadata caches + * in PHDF5, only the cache with MPI_rank 0 is allowed to + * actually write entries to disk -- all other caches must + * retain dirty entries until they are advised that the + * entries are clean. + * + * This function exists to allow the H5C code to receive these + * notifications. + * + * The function receives a list of entry base addresses + * which must refer to dirty entries in the cache. If any + * of the entries are either clean or don't exist, the + * function flags an error. + * + * The function scans the list of entries and flushes all + * those that are currently unprotected with the + * H5C__FLUSH_CLEAR_ONLY_FLAG. Those that are currently + * protected are flagged for clearing when they are + * unprotected. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: John Mainzer + * 7/5/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +herr_t +H5C_mark_entries_as_clean(H5F_t * f, + hid_t primary_dxpl_id, + hid_t secondary_dxpl_id, + H5C_t * cache_ptr, + int32_t ce_array_len, + haddr_t * ce_array_ptr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + hbool_t first_flush = TRUE; + int i; + haddr_t addr; +#if H5C_DO_SANITY_CHECKS + haddr_t last_addr; +#endif /* H5C_DO_SANITY_CHECKS */ + H5C_cache_entry_t * entry_ptr = NULL; + + FUNC_ENTER_NOAPI(H5C_mark_entries_as_clean, FAIL) + + HDassert( cache_ptr ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + HDassert( cache_ptr->skip_file_checks || f ); + + HDassert( ce_array_len > 0 ); + HDassert( ce_array_ptr != NULL ); + +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HDassert(0); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + + for ( i = 0; i < ce_array_len; i++ ) + { + addr = ce_array_ptr[i]; + +#if H5C_DO_SANITY_CHECKS + if ( i == 0 ) { + + last_addr = addr; + + } else { + + if ( last_addr == addr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Duplicate entry in cleaned list.\n"); + + } else if ( last_addr > addr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "cleaned list not sorted.\n"); + } + } + +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HDassert(0); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ +#endif /* H5C_DO_SANITY_CHECKS */ + + HDassert( H5F_addr_defined(addr) ); + + H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL) + + if ( entry_ptr == NULL ) { +#if H5C_DO_SANITY_CHECKS + HDfprintf(stdout, + "H5C_mark_entries_as_clean: entry[%d] = %ld not in cache.\n", + (int)i, + (long)addr); +#endif /* H5C_DO_SANITY_CHECKS */ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Listed entry not in cache?!?!?.") + + } else if ( ! entry_ptr->is_dirty ) { + +#if H5C_DO_SANITY_CHECKS + HDfprintf(stdout, + "H5C_mark_entries_as_clean: entry %ld is not dirty!?!\n", + (long)addr); +#endif /* H5C_DO_SANITY_CHECKS */ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Listed entry not dirty?!?!?.") + + } else if ( entry_ptr->is_protected ) { + + entry_ptr->clear_on_unprotect = TRUE; + + } else { + + if ( H5C_flush_single_entry(f, + primary_dxpl_id, + secondary_dxpl_id, + cache_ptr, + entry_ptr->type, + addr, + H5C__FLUSH_CLEAR_ONLY_FLAG, + &first_flush, + TRUE) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") + } + } + } + +done: + +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HDassert(0); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5C_mark_entries_as_clean() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * * Function: H5C_rename_entry * * Purpose: Use this function to notify the cache that an entry's @@ -3077,6 +3625,15 @@ H5C_rename_entry(H5C_t * cache_ptr, HDassert( H5F_addr_defined(new_addr) ); HDassert( H5F_addr_ne(old_addr, new_addr) ); +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + + H5C__SEARCH_INDEX(cache_ptr, old_addr, entry_ptr, FAIL) if ( ( entry_ptr == NULL ) || ( entry_ptr->type != type ) ) @@ -3146,6 +3703,14 @@ H5C_rename_entry(H5C_t * cache_ptr, done: +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + FUNC_LEAVE_NOAPI(ret_value) } /* H5C_rename_entry() */ @@ -3204,6 +3769,9 @@ done: * forces an immediate reduction in cache size. Modified * the code to deal with this eventuallity. * + * JRM -- 6/24/05 + * Added support for the new write_permitted field of H5C_t. + * *------------------------------------------------------------------------- */ @@ -3237,6 +3805,15 @@ H5C_protect(H5F_t * f, HDassert( type->load ); HDassert( H5F_addr_defined(addr) ); +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HDassert(0); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, NULL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + /* first check to see if the target is in cache */ H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, NULL) @@ -3284,6 +3861,8 @@ H5C_protect(H5F_t * f, } } else { + write_permitted = cache_ptr->write_permitted; + have_write_permitted = TRUE; } @@ -3390,6 +3969,8 @@ H5C_protect(H5F_t * f, } } else { + write_permitted = cache_ptr->write_permitted; + have_write_permitted = TRUE; } } @@ -3439,6 +4020,15 @@ H5C_protect(H5F_t * f, done: +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HDassert(0); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, NULL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + FUNC_LEAVE_NOAPI(ret_value) } /* H5C_protect() */ @@ -3796,6 +4386,10 @@ done: * JRM -- 7/21/04 * Updated function for the addition of the hash table. * + * JRM -- 9/8/05 + * Updated function for the addition of cache entry size + * change statistics. + * *------------------------------------------------------------------------- */ @@ -3819,6 +4413,8 @@ H5C_stats(H5C_t * cache_ptr, int64_t total_flushes = 0; int64_t total_evictions = 0; int64_t total_renames = 0; + int64_t total_size_increases = 0; + int64_t total_size_decreases = 0; int32_t aggregate_max_accesses = 0; int32_t aggregate_min_accesses = 1000000; int32_t aggregate_max_clears = 0; @@ -3845,13 +4441,15 @@ H5C_stats(H5C_t * cache_ptr, for ( i = 0; i <= cache_ptr->max_type_id; i++ ) { - total_hits += cache_ptr->hits[i]; - total_misses += cache_ptr->misses[i]; - total_insertions += cache_ptr->insertions[i]; - total_clears += cache_ptr->clears[i]; - total_flushes += cache_ptr->flushes[i]; - total_evictions += cache_ptr->evictions[i]; - total_renames += cache_ptr->renames[i]; + total_hits += cache_ptr->hits[i]; + total_misses += cache_ptr->misses[i]; + total_insertions += cache_ptr->insertions[i]; + total_clears += cache_ptr->clears[i]; + total_flushes += cache_ptr->flushes[i]; + total_evictions += cache_ptr->evictions[i]; + total_renames += cache_ptr->renames[i]; + total_size_increases += cache_ptr->size_increases[i]; + total_size_decreases += cache_ptr->size_decreases[i]; #if H5C_COLLECT_CACHE_ENTRY_STATS if ( aggregate_max_accesses < cache_ptr->max_accesses[i] ) aggregate_max_accesses = cache_ptr->max_accesses[i]; @@ -3963,6 +4561,10 @@ H5C_stats(H5C_t * cache_ptr, (long)total_insertions, (long)total_renames); + HDfprintf(stdout, " Total entry size incrs / decrs = %ld / %ld\n", + (long)total_size_increases, + (long)total_size_decreases); + #if H5C_COLLECT_CACHE_ENTRY_STATS HDfprintf(stdout, " aggregate max / min accesses = %d / %d\n", @@ -4014,6 +4616,11 @@ H5C_stats(H5C_t * cache_ptr, (long)(cache_ptr->insertions[i]), (long)(cache_ptr->renames[i])); + HDfprintf(stdout, + " size increases / decreases = %ld / %ld\n", + (long)(cache_ptr->size_increases[i]), + (long)(cache_ptr->size_decreases[i])); + #if H5C_COLLECT_CACHE_ENTRY_STATS HDfprintf(stdout, @@ -4061,6 +4668,9 @@ done: * JRM - 7/21/04 * Updated for hash table related statistics. * + * JRM - 9/8/05 + * Updated for size increase / decrease statistics. + * *------------------------------------------------------------------------- */ @@ -4084,6 +4694,8 @@ H5C_stats__reset(H5C_t * cache_ptr) cache_ptr->flushes[i] = 0; cache_ptr->evictions[i] = 0; cache_ptr->renames[i] = 0; + cache_ptr->size_increases[i] = 0; + cache_ptr->size_decreases[i] = 0; } cache_ptr->total_ht_insertions = 0; @@ -4176,6 +4788,27 @@ H5C_stats__reset(H5C_t * cache_ptr) * field into the cache code. This has become necessary * to repair a cache coherency bug in PHDF5. * + * JRM -- 7/5/05 + * Added code supporting the new clear_on_unprotect field + * of H5C_cache_entry_t. This change is also part of the + * above mentioned cache coherency bug fix in PHDF5. + * + * JRM -- 9/8/05 + * Added the size_changed and new_size parameters and the + * supporting code. Since the metadata cache synchronizes + * on dirty bytes creation in the PHDF5 case, we must now + * track changes in entry size. + * + * Note that the new_size parameter is ignored unless the + * size_changed parameter is TRUE. In this case, the new_size + * must be positive. + * + * Also observe that if size_changed is TRUE, dirtied must be + * TRUE. + * + * JRM -- 9/23/05 + * Moved the size_changed parameter into flags. + * *------------------------------------------------------------------------- */ herr_t @@ -4186,16 +4819,27 @@ H5C_unprotect(H5F_t * f, const H5C_class_t * type, haddr_t addr, void * thing, - unsigned int flags) + unsigned int flags, + size_t new_size) { hbool_t deleted; + hbool_t dirtied; hbool_t set_flush_marker; + hbool_t size_changed; +#ifdef H5_HAVE_PARALLEL + hbool_t clear_entry = FALSE; +#endif /* H5_HAVE_PARALLEL */ herr_t ret_value = SUCCEED; /* Return value */ H5C_cache_entry_t * entry_ptr; H5C_cache_entry_t * test_entry_ptr; FUNC_ENTER_NOAPI(H5C_unprotect, FAIL) + deleted = ( (flags & H5C__DELETED_FLAG) != 0 ); + dirtied = ( (flags & H5C__DIRTIED_FLAG) != 0 ); + set_flush_marker = ( (flags & H5C__SET_FLUSH_MARKER_FLAG) != 0 ); + size_changed = ( (flags & H5C__SIZE_CHANGED_FLAG) != 0 ); + HDassert( cache_ptr ); HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); HDassert( cache_ptr->skip_file_checks || f ); @@ -4204,15 +4848,52 @@ H5C_unprotect(H5F_t * f, HDassert( type->flush ); HDassert( H5F_addr_defined(addr) ); HDassert( thing ); - - deleted = ( (flags & H5C__DELETED_FLAG) != 0 ); - set_flush_marker = ( (flags & H5C__SET_FLUSH_MARKER_FLAG) != 0 ); + HDassert( ( size_changed == TRUE ) || ( size_changed == FALSE ) ); + HDassert( ( ! size_changed ) || ( dirtied ) ); + HDassert( ( ! size_changed ) || ( new_size > 0 ) ); entry_ptr = (H5C_cache_entry_t *)thing; HDassert( entry_ptr->addr == addr ); HDassert( entry_ptr->type == type ); +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + +#ifdef H5_HAVE_PARALLEL + /* When the H5C code is used to implement the metadata cache in the + * PHDF5 case, only the cache on process 0 is allowed to write to file. + * All the other metadata caches must hold dirty entries until they + * are told that the entries are clean. + * + * The clear_on_unprotect flag in the H5C_cache_entry_t structure + * exists to deal with the case in which an entry is protected when + * its cache receives word that the entry is now clean. In this case, + * the clear_on_unprotect flag is set, and the entry is flushed with + * the H5C__FLUSH_CLEAR_ONLY_FLAG. + * + * All this is a bit awkward, but until the metadata cache entries + * are contiguous, with only one dirty flag, we have to let the supplied + * functions deal with the reseting the is_dirty flag. + */ + if ( entry_ptr->clear_on_unprotect ) { + + HDassert( entry_ptr->is_dirty ); + + entry_ptr->clear_on_unprotect = FALSE; + + if ( ! dirtied ) { + + clear_entry = TRUE; + } + } +#endif /* H5_HAVE_PARALLEL */ + if ( ! (entry_ptr->is_protected) ) { HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ @@ -4220,13 +4901,40 @@ H5C_unprotect(H5F_t * f, } /* mark the entry as dirty if appropriate */ - entry_ptr->is_dirty = ( (entry_ptr->is_dirty) || (flags & H5AC__DIRTIED_FLAG) ); + entry_ptr->is_dirty = ( (entry_ptr->is_dirty) || dirtied ); + + /* update for change in entry size if necessary */ + if ( ( size_changed ) && ( entry_ptr->size != new_size ) ) { + + /* update the protected list */ + H5C__DLL_UPDATE_FOR_SIZE_CHANGE((cache_ptr->pl_len), \ + (cache_ptr->pl_size), \ + (entry_ptr->size), (new_size)); + + /* update the hash table */ + H5C__UPDATE_INDEX_FOR_SIZE_CHANGE((cache_ptr), (entry_ptr->size),\ + (new_size)); + + /* if the entry is in the skip list, update that too */ + if ( entry_ptr->in_slist ) { + + H5C__UPDATE_SLIST_FOR_SIZE_CHANGE((cache_ptr), (entry_ptr->size),\ + (new_size)); + } + + /* update statistics just before changing the entry size */ + H5C__UPDATE_STATS_FOR_ENTRY_SIZE_CHANGE((cache_ptr), (entry_ptr), \ + (new_size)); + + /* finally, update the entry size proper */ + entry_ptr->size = new_size; + } H5C__UPDATE_RP_FOR_UNPROTECT(cache_ptr, entry_ptr, FAIL) entry_ptr->is_protected = FALSE; - /* if the entry is dirty, or its flush_marker with the set flush flag, + /* if the entry is dirty, 'or' its flush_marker with the set flush flag, * and then add it to the skip list if it isn't there already. */ @@ -4287,11 +4995,57 @@ H5C_unprotect(H5F_t * f, HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "Can't flush.") } } +#ifdef H5_HAVE_PARALLEL + else if ( clear_entry ) { + + /* the following first flush flag will never be used as we are + * calling H5C_flush_single_entry with the H5C__FLUSH_CLEAR_ONLY_FLAG + * flag. However, it is needed for the function call. + */ + hbool_t dummy_first_flush = TRUE; + + /* verify that the target entry is in the cache. */ + + H5C__SEARCH_INDEX(cache_ptr, addr, test_entry_ptr, FAIL) + + if ( test_entry_ptr == NULL ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "entry not in hash table?!?.") + } + else if ( test_entry_ptr != entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "hash table contains multiple entries for addr?!?.") + } + + if ( H5C_flush_single_entry(f, + primary_dxpl_id, + secondary_dxpl_id, + cache_ptr, + type, + addr, + H5C__FLUSH_CLEAR_ONLY_FLAG, + &dummy_first_flush, + TRUE) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "Can't clear.") + } + } +#endif /* H5_HAVE_PARALLEL */ H5C__UPDATE_STATS_FOR_UNPROTECT(cache_ptr) done: +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + FUNC_LEAVE_NOAPI(ret_value) } /* H5C_unprotect() */ @@ -5656,6 +6410,18 @@ done: * H5C__FLUSH_INVALIDATE_FLAG and H5C__FLUSH_CLEAR_ONLY_FLAG * respectively. * + * JRM -- 6/24/05 + * Added code to remove dirty entries from the slist after + * they have been flushed. Also added a sanity check that + * will scream if we attempt a write when writes are + * completely disabled. + * + * JRM -- 7/5/05 + * Added code to call the new log_flush callback whenever + * a dirty entry is written to disk. Note that the callback + * is not called if the H5C__FLUSH_CLEAR_ONLY_FLAG is set, + * as there is no write to file in this case. + * *------------------------------------------------------------------------- */ static herr_t @@ -5671,8 +6437,10 @@ H5C_flush_single_entry(H5F_t * f, { hbool_t destroy; hbool_t clear_only; + hbool_t was_dirty; herr_t ret_value = SUCCEED; /* Return value */ herr_t status; + int type_id; H5C_cache_entry_t * entry_ptr = NULL; FUNC_ENTER_NOAPI_NOINIT(H5C_flush_single_entry) @@ -5779,6 +6547,9 @@ H5C_flush_single_entry(H5F_t * f, #endif /* NDEBUG */ #endif /* H5_HAVE_PARALLEL */ + was_dirty = entry_ptr->is_dirty; + type_id = entry_ptr->type->id; + entry_ptr->flush_marker = FALSE; if ( clear_only ) { @@ -5928,6 +6699,16 @@ H5C_flush_single_entry(H5F_t * f, } } else { +#if H5C_DO_SANITY_CHECKS + if ( ( entry_ptr->is_dirty ) && + ( cache_ptr->check_write_permitted == NULL ) && + ( ! (cache_ptr->write_permitted) ) ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Write when writes are always forbidden!?!?!") + } +#endif /* H5C_DO_SANITY_CHECKS */ + /* Only block for all the processes on the first piece of metadata */ @@ -5951,10 +6732,28 @@ H5C_flush_single_entry(H5F_t * f, } } + if ( ( ! destroy ) && ( entry_ptr->in_slist ) ) { + + H5C__REMOVE_ENTRY_FROM_SLIST(cache_ptr, entry_ptr) + } + if ( ! destroy ) { HDassert( !(entry_ptr->is_dirty) ); HDassert( !(entry_ptr->flush_marker) ); + HDassert( !(entry_ptr->in_slist) ); + } + + if ( cache_ptr->log_flush ) { + + status = (cache_ptr->log_flush)(cache_ptr, addr, was_dirty, + flags, type_id); + + if ( status < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ + "log_flush callback failed.") + } } } @@ -6024,6 +6823,10 @@ H5C_load_entry(H5F_t * f, entry_ptr->type = type; entry_ptr->is_protected = FALSE; entry_ptr->in_slist = FALSE; + entry_ptr->flush_marker = FALSE; +#ifdef H5_HAVE_PARALLEL + entry_ptr->clear_on_unprotect = FALSE; +#endif /* H5_HAVE_PARALLEL */ if ( (type->size)(f, thing, &(entry_ptr->size)) < 0 ) { @@ -6271,3 +7074,203 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* H5C_make_space_in_cache() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5C_validate_lru_list + * + * Purpose: Debugging function that scans the LRU list for errors. + * + * If an error is detected, the function generates a + * diagnostic and returns FAIL. If no error is detected, + * the function returns SUCCEED. + * + * Return: FAIL if error is detected, SUCCEED otherwise. + * + * Programmer: John Mainzer, 7/14/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#if H5C_DO_EXTREME_SANITY_CHECKS + +static herr_t +H5C_validate_lru_list(H5C_t * cache_ptr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + int32_t len = 0; + size_t size = 0; + H5C_cache_entry_t * entry_ptr = NULL; + + FUNC_ENTER_NOAPI_NOINIT(H5C_validate_lru_list) + + HDassert( cache_ptr ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + if ( ( ( cache_ptr->LRU_head_ptr == NULL ) + || + ( cache_ptr->LRU_tail_ptr == NULL ) + ) + && + ( cache_ptr->LRU_head_ptr != cache_ptr->LRU_tail_ptr ) + ) { + + HDfprintf(stdout,"H5C_validate_lru_list: Check 1 failed.\n"); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 1 failed") + } + + if ( ( cache_ptr->LRU_list_len < 0 ) || ( cache_ptr->LRU_list_size < 0 ) ) { + + HDfprintf(stdout,"H5C_validate_lru_list: Check 2 failed.\n"); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 2 failed") + } + + if ( ( cache_ptr->LRU_list_len == 1 ) + && + ( ( cache_ptr->LRU_head_ptr != cache_ptr->LRU_tail_ptr ) + || + ( cache_ptr->LRU_head_ptr == NULL ) + || + ( cache_ptr->LRU_head_ptr->size != cache_ptr->LRU_list_size ) + ) + ) { + + HDfprintf(stdout,"H5C_validate_lru_list: Check 3 failed.\n"); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 3 failed") + } + + if ( ( cache_ptr->LRU_list_len >= 1 ) + && + ( ( cache_ptr->LRU_head_ptr == NULL ) + || + ( cache_ptr->LRU_head_ptr->prev != NULL ) + || + ( cache_ptr->LRU_tail_ptr == NULL ) + || + ( cache_ptr->LRU_tail_ptr->next != NULL ) + ) + ) { + + HDfprintf(stdout,"H5C_validate_lru_list: Check 4 failed.\n"); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 4 failed") + } + + entry_ptr = cache_ptr->LRU_head_ptr; + while ( entry_ptr != NULL ) + { + + if ( ( entry_ptr != cache_ptr->LRU_head_ptr ) && + ( ( entry_ptr->prev == NULL ) || + ( entry_ptr->prev->next != entry_ptr ) ) ) { + + HDfprintf(stdout,"H5C_validate_lru_list: Check 5 failed.\n"); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 5 failed") + } + + if ( ( entry_ptr != cache_ptr->LRU_tail_ptr ) && + ( ( entry_ptr->next == NULL ) || + ( entry_ptr->next->prev != entry_ptr ) ) ) { + + HDfprintf(stdout,"H5C_validate_lru_list: Check 6 failed.\n"); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 6 failed") + } + + len++; + size += entry_ptr->size; + entry_ptr = entry_ptr->next; + } + + if ( ( cache_ptr->LRU_list_len != len ) || + ( cache_ptr->LRU_list_size != size ) ) { + + HDfprintf(stdout,"H5C_validate_lru_list: Check 7 failed.\n"); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 7 failed") + } + +done: + + if ( ret_value != SUCCEED ) { + + HDassert(0); + } + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5C_validate_lru_list() */ + +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + + +/*------------------------------------------------------------------------- + * + * Function: H5C_verify_not_in_index + * + * Purpose: Debugging function that scans the hash table to verify + * that the specified instance of H5C_cache_entry_t is not + * present. + * + * If an error is detected, the function generates a + * diagnostic and returns FAIL. If no error is detected, + * the function returns SUCCEED. + * + * Return: FAIL if error is detected, SUCCEED otherwise. + * + * Programmer: John Mainzer, 7/14/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#if H5C_DO_EXTREME_SANITY_CHECKS + +static herr_t +H5C_verify_not_in_index(H5C_t * cache_ptr, + H5C_cache_entry_t * entry_ptr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + int32_t i; + int32_t depth; + H5C_cache_entry_t * scan_ptr = NULL; + + FUNC_ENTER_NOAPI_NOINIT(H5C_verify_not_in_index) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + HDassert( entry_ptr != NULL ); + + for ( i = 0; i < H5C__HASH_TABLE_LEN; i++ ) + { + depth = 0; + scan_ptr = cache_ptr->index[i]; + + while ( scan_ptr != NULL ) + { + if ( scan_ptr == entry_ptr ) { + + HDfprintf(stdout, + "H5C_verify_not_in_index: entry in index (%d/%d)\n", + i, depth); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Entry already in index.") + } + depth++; + scan_ptr = scan_ptr->ht_next; + } + } + +done: + + if ( ret_value != SUCCEED ) { + + HDassert(0); + } + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5C_verify_not_in_index() */ + +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ diff --git a/src/H5Cpkg.h b/src/H5Cpkg.h index 54a8f48..007f30f 100644 --- a/src/H5Cpkg.h +++ b/src/H5Cpkg.h @@ -79,9 +79,18 @@ * * JRM - 7/19/04 * + * The TBBT has since been replaced with a skip list. This change + * greatly predates this note. + * + * JRM - 9/26/05 + * * magic: Unsigned 32 bit integer always set to H5C__H5C_T_MAGIC. This * field is used to validate pointers to instances of H5C_t. * + * aux_ptr: Pointer to void used to allow wrapper code to associate + * its data with an instance of H5C_t. The H5C cache code + * sets this field to NULL, and otherwise leaves it alone. + * * max_type_id: Integer field containing the maximum type id number assigned * to a type of entry in the cache. All type ids from 0 to * max_type_id inclusive must be defined. The names of the @@ -110,6 +119,10 @@ * will attempt to reduce its size to the max_cache_size * limit on the next cache write. * + * c) When an entry increases in size, the cache may exceed + * the max_cache_size limit until the next time the cache + * attempts to load or insert an entry. + * * min_clean_size: Nominal minimum number of clean bytes in the cache. * The cache attempts to maintain this number of bytes of * clean data so as to avoid case b) above. Again, this is @@ -126,7 +139,14 @@ * a write is permissible at any given point in time. * * If no such function is specified (i.e. this field is NULL), - * the cache will presume that writes are always permissable. + * the cache uses the following write_permitted field to + * determine whether writes are permitted. + * + * write_permitted: If check_write_permitted is NULL, this boolean flag + * indicates whether writes are permitted. + * + * log_flush: If provided, this function is called whenever a dirty + * entry is flushed to disk. * * * The cache requires an index to facilitate searching for entries. The @@ -483,6 +503,16 @@ * id equal to the array index has been renamed in the current * epoch. * + * size_increases: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. + * The cells are used to record the number of times an entry + * with type id equal to the array index has increased in + * size in the current epoch. + * + * size_decreases: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. + * The cells are used to record the number of times an entry + * with type id equal to the array index has decreased in + * size in the current epoch. + * * total_ht_insertions: Number of times entries have been inserted into the * hash table in the current epoch. * @@ -580,6 +610,8 @@ struct H5C_t { uint32_t magic; + void * aux_ptr; + int32_t max_type_id; const char * (* type_name_table_ptr); @@ -587,6 +619,9 @@ struct H5C_t size_t min_clean_size; H5C_write_permitted_func_t check_write_permitted; + hbool_t write_permitted; + + H5C_log_flush_func_t log_flush; int32_t index_len; size_t index_size; @@ -646,6 +681,8 @@ struct H5C_t int64_t flushes[H5C__MAX_NUM_TYPE_IDS + 1]; int64_t evictions[H5C__MAX_NUM_TYPE_IDS + 1]; int64_t renames[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t size_increases[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t size_decreases[H5C__MAX_NUM_TYPE_IDS + 1]; int64_t total_ht_insertions; int64_t total_ht_deletions; diff --git a/src/H5Cprivate.h b/src/H5Cprivate.h index 7c0151b..57d74af 100644 --- a/src/H5Cprivate.h +++ b/src/H5Cprivate.h @@ -36,6 +36,7 @@ #include "H5Fprivate.h" /* File access */ #define H5C_DO_SANITY_CHECKS 0 +#define H5C_DO_EXTREME_SANITY_CHECKS 0 /* This sanity checking constant was picked out of the air. Increase * or decrease it if appropriate. Its purposes is to detect corrupt @@ -149,6 +150,12 @@ typedef herr_t (*H5C_write_permitted_func_t)(const H5F_t *f, hid_t dxpl_id, hbool_t * write_permitted_ptr); +typedef herr_t (*H5C_log_flush_func_t)(H5C_t * cache_ptr, + haddr_t addr, + hbool_t was_dirty, + unsigned flags, + int type_id); + /* Upper and lower limits on cache size. These limits are picked * out of a hat -- you should be able to change them as necessary. * @@ -180,8 +187,9 @@ typedef herr_t (*H5C_write_permitted_func_t)(const H5F_t *f, * * In typical application, this structure is the first field in a * structure to be cached. For historical reasons, the external module - * is responsible for managing the is_dirty field. All other fields are - * managed by the cache. + * is responsible for managing the is_dirty field (this is no longer + * completely true. See the comment on the is_dirty field for details). + * All other fields are managed by the cache. * * The fields of this structure are discussed individually below: * @@ -220,6 +228,12 @@ typedef herr_t (*H5C_write_permitted_func_t)(const H5F_t *f, * someday. However it will require a change in the * cache interface. * + * Update: Management of the is_dirty field has been largely + * moved into the cache. The only remaining exceptions + * are the flush and clear functions supplied by the + * modules using the cache. These still clear the + * is_dirty field as before. -- JRM 7/5/05 + * * is_protected: Boolean flag indicating whether this entry is protected * (or locked, to use more conventional terms). When it is * protected, the entry cannot be flushed or accessed until @@ -239,6 +253,18 @@ typedef herr_t (*H5C_write_permitted_func_t)(const H5F_t *f, * H5AC__FLUSH_MARKED_ENTRIES_FLAG. The flag is reset when * the entry is flushed for whatever reason. * + * clear_on_unprotect: Boolean flag used only in PHDF5. When H5C is used + * to implement the metadata cache In the parallel case, only + * the cache with mpi rank 0 is allowed to actually write to + * file -- all other caches must retain dirty entries until they + * are advised that the entry is clean. + * + * This flag is used in the case that such an advisory is + * received when the entry is protected. If it is set when an + * entry is unprotected, and the dirtied flag is not set in + * the unprotect, the entry's is_dirty flag is reset by flushing + * it with the H5C__FLUSH_CLEAR_ONLY_FLAG. + * * * Fields supporting the hash table: * @@ -344,6 +370,9 @@ typedef struct H5C_cache_entry_t hbool_t is_protected; hbool_t in_slist; hbool_t flush_marker; +#ifdef H5_HAVE_PARALLEL + hbool_t clear_on_unprotect; +#endif /* H5_HAVE_PARALLEL */ /* fields supporting the hash table: */ @@ -676,20 +705,24 @@ typedef struct H5C_auto_size_ctl_t #define H5C__SET_FLUSH_MARKER_FLAG 0x0001 #define H5C__DELETED_FLAG 0x0002 -/* This flag applies only to H5C_unprotect() */ +/* These flags applies only to H5C_unprotect() */ #define H5C__DIRTIED_FLAG 0x0004 +#define H5C__SIZE_CHANGED_FLAG 0x0008 /* These flags apply to H5C_flush() & H5C_flush_single_entry() */ -#define H5C__FLUSH_INVALIDATE_FLAG 0x0008 -#define H5C__FLUSH_CLEAR_ONLY_FLAG 0x0010 -#define H5C__FLUSH_MARKED_ENTRIES_FLAG 0x0020 +#define H5C__FLUSH_INVALIDATE_FLAG 0x0010 +#define H5C__FLUSH_CLEAR_ONLY_FLAG 0x0020 +#define H5C__FLUSH_MARKED_ENTRIES_FLAG 0x0040 H5_DLL H5C_t * H5C_create(size_t max_cache_size, size_t min_clean_size, int max_type_id, - const char * (* type_name_table_ptr), - H5C_write_permitted_func_t check_write_permitted); + const char * (* type_name_table_ptr), + H5C_write_permitted_func_t check_write_permitted, + hbool_t write_permitted, + H5C_log_flush_func_t log_flush, + void * aux_ptr); H5_DLL void H5C_def_auto_resize_rpt_fcn(H5C_t * cache_ptr, int32_t version, @@ -713,6 +746,11 @@ H5_DLL herr_t H5C_flush_cache(H5F_t * f, H5C_t * cache_ptr, unsigned flags); +H5_DLL herr_t H5C_flush_to_min_clean(H5F_t * f, + hid_t primary_dxpl_id, + hid_t secondary_dxpl_id, + H5C_t * cache_ptr); + H5_DLL herr_t H5C_get_cache_auto_resize_config(H5C_t * cache_ptr, H5C_auto_size_ctl_t *config_ptr); @@ -725,6 +763,13 @@ H5_DLL herr_t H5C_get_cache_size(H5C_t * cache_ptr, H5_DLL herr_t H5C_get_cache_hit_rate(H5C_t * cache_ptr, double * hit_rate_ptr); +H5_DLL herr_t H5C_get_entry_status(H5C_t * cache_ptr, + haddr_t addr, + size_t * size_ptr, + hbool_t * in_cache_ptr, + hbool_t * is_dirty_ptr, + hbool_t * is_protected_ptr); + H5_DLL herr_t H5C_insert_entry(H5F_t * f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, @@ -734,6 +779,13 @@ H5_DLL herr_t H5C_insert_entry(H5F_t * f, void * thing, unsigned int flags); +H5_DLL herr_t H5C_mark_entries_as_clean(H5F_t * f, + hid_t primary_dxpl_id, + hid_t secondary_dxpl_id, + H5C_t * cache_ptr, + int32_t ce_array_len, + haddr_t * ce_array_ptr); + H5_DLL herr_t H5C_rename_entry(H5C_t * cache_ptr, const H5C_class_t * type, haddr_t old_addr, @@ -770,7 +822,8 @@ H5_DLL herr_t H5C_unprotect(H5F_t * f, const H5C_class_t * type, haddr_t addr, void * thing, - unsigned flags); + unsigned int flags, + size_t new_size); H5_DLL herr_t H5C_validate_resize_config(H5C_auto_size_ctl_t * config_ptr, unsigned int tests); diff --git a/src/H5D.c b/src/H5D.c index 1063499..400d41e 100644 --- a/src/H5D.c +++ b/src/H5D.c @@ -2034,7 +2034,7 @@ H5D_update_entry_info(H5F_t *file, hid_t dxpl_id, H5D_t *dset, H5P_genplist_t *p #endif /* H5O_ENABLE_BOGUS */ /* Add a modification time message. */ - if (H5O_touch_oh(file, oh, TRUE, &oh_flags) < 0) + if (H5O_touch_oh(file, dxpl_id, oh, TRUE, &oh_flags) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to update modification time message") done: diff --git a/src/H5F.c b/src/H5F.c index f9c1784..cd75a2f 100644 --- a/src/H5F.c +++ b/src/H5F.c @@ -67,7 +67,8 @@ static int H5F_flush_all_cb(void *f, hid_t fid, void *_invalidate); static unsigned H5F_get_objects(const H5F_t *f, unsigned types, int max_objs, hid_t *obj_id_list); static int H5F_get_objects_cb(void *obj_ptr, hid_t obj_id, void *key); static herr_t H5F_get_vfd_handle(const H5F_t *file, hid_t fapl, void** file_handle); -static H5F_t *H5F_new(H5F_file_t *shared, hid_t fcpl_id, hid_t fapl_id); +static H5F_t *H5F_new(H5F_file_t *shared, hid_t fcpl_id, hid_t fapl_id, + H5FD_t *lf); static herr_t H5F_dest(H5F_t *f, hid_t dxpl_id); static herr_t H5F_flush(H5F_t *f, hid_t dxpl_id, H5F_scope_t scope, unsigned flags); static herr_t H5F_close(H5F_t *f); @@ -1426,10 +1427,16 @@ done: * Updated for the new metadata cache, and associated * property list changes. * + * J Mainzer, Jun 30, 2005 + * Added lf parameter so the shared->lf field can be + * initialized prior to the call to H5AC_create() if a + * new instance of H5F_file_t is created. lf should be + * NULL if shared isn't, and vise versa. + * *------------------------------------------------------------------------- */ static H5F_t * -H5F_new(H5F_file_t *shared, hid_t fcpl_id, hid_t fapl_id) +H5F_new(H5F_file_t *shared, hid_t fcpl_id, hid_t fapl_id, H5FD_t *lf) { H5F_t *f=NULL, *ret_value; H5P_genplist_t *plist; /* Property list */ @@ -1441,14 +1448,17 @@ H5F_new(H5F_file_t *shared, hid_t fcpl_id, hid_t fapl_id) f->file_id = -1; if (shared) { + HDassert( lf == NULL ); f->shared = shared; } else { + HDassert( lf != NULL ); f->shared = H5FL_CALLOC(H5F_file_t); f->shared->super_addr = HADDR_UNDEF; f->shared->base_addr = HADDR_UNDEF; f->shared->freespace_addr = HADDR_UNDEF; f->shared->driver_addr = HADDR_UNDEF; - + f->shared->lf = lf; + /* * Copy the file creation and file access property lists into the * new file handle. We do this early because some values might need @@ -1803,10 +1813,10 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id, hid_t d HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "file is already open for read-only") /* Allocate new "high-level" file struct */ - if ((file = H5F_new(shared, fcpl_id, fapl_id)) == NULL) + if ((file = H5F_new(shared, fcpl_id, fapl_id, NULL)) == NULL) HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to create new file object") - lf = file->shared->lf; + lf = file->shared->lf; } else if (flags!=tent_flags) { /* * This file is not yet open by the library and the flags we used to @@ -1821,20 +1831,18 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id, hid_t d file = NULL; /*to prevent destruction of wrong file*/ HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open file") } - if (NULL==(file = H5F_new(NULL, fcpl_id, fapl_id))) + if (NULL==(file = H5F_new(NULL, fcpl_id, fapl_id, lf))) HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to create new file object") file->shared->flags = flags; - file->shared->lf = lf; } else { /* * This file is not yet open by the library and our tentative opening * above is good enough. */ - if (NULL==(file = H5F_new(NULL, fcpl_id, fapl_id))) + if (NULL==(file = H5F_new(NULL, fcpl_id, fapl_id, lf))) HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to create new file object") file->shared->flags = flags; - file->shared->lf = lf; } /* Short cuts */ @@ -2841,7 +2849,7 @@ H5Freopen(hid_t file_id) HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file") /* Get a new "top level" file struct, sharing the same "low level" file struct */ - if (NULL==(new_file=H5F_new(old_file->shared, H5P_FILE_CREATE_DEFAULT, H5P_FILE_ACCESS_DEFAULT))) + if (NULL==(new_file=H5F_new(old_file->shared, H5P_FILE_CREATE_DEFAULT, H5P_FILE_ACCESS_DEFAULT, NULL))) HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, FAIL, "unable to reopen file") /* Keep old file's read/write intent in new file */ @@ -3389,6 +3397,40 @@ H5F_mpi_get_comm(const H5F_t *f) done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5F_mpi_get_comm() */ + + +/*------------------------------------------------------------------------- + * Function: H5F_mpi_get_size + * + * Purpose: Retrieves the size of an MPI process. + * + * Return: Success: The size (positive) + * + * Failure: Negative + * + * Programmer: John Mainzer + * Friday, May 6, 2005 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +int +H5F_mpi_get_size(const H5F_t *f) +{ + int ret_value; + + FUNC_ENTER_NOAPI(H5F_mpi_get_size, FAIL) + + assert(f && f->shared); + + /* Dispatch to driver */ + if ((ret_value=H5FD_mpi_get_size(f->shared->lf))<0) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "driver get_size request failed") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5F_mpi_get_size() */ #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5FD.c b/src/H5FD.c index 8d152ec..b40b7b0 100644 --- a/src/H5FD.c +++ b/src/H5FD.c @@ -2328,7 +2328,7 @@ H5FD_free(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, hsize_t si H5_ASSIGN_OVERFLOW(overlap_size,(file->accum_loc+file->accum_size)-addr,haddr_t,size_t); /* Block to free is in the middle of the accumulator */ - if(H5F_addr_lt(addr,file->accum_loc+file->accum_size)) { + if(H5F_addr_lt((addr + size), file->accum_loc + file->accum_size)) { haddr_t tail_addr; size_t tail_size; diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index f285f1a..3cf1968 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -934,6 +934,13 @@ done: * * Modifications: * + * John Mainzer -- 9/21/05 + * Modified code to turn off the + * H5FD_FEAT_ACCUMULATE_METADATA_WRITE flag. + * With the movement of + * all cache writes to process 0, this flag has become + * problematic in PHDF5. + * *------------------------------------------------------------------------- */ static herr_t @@ -947,15 +954,6 @@ H5FD_mpio_query(const H5FD_t UNUSED *_file, unsigned long *flags /* out */) if(flags) { *flags=0; *flags|=H5FD_FEAT_AGGREGATE_METADATA; /* OK to aggregate metadata allocations */ - - /* Distinguish between updating the metadata accumulator on writes and - * reads. This is particularly (perhaps only, even) important for MPI-I/O - * where we guarantee that writes are collective, but reads may not be. - * If we were to allow the metadata accumulator to be written during a - * read operation, the application would hang. - */ - *flags|=H5FD_FEAT_ACCUMULATE_METADATA_WRITE; /* OK to accumulate metadata for faster writes */ - *flags|=H5FD_FEAT_AGGREGATE_SMALLDATA; /* OK to aggregate "small" raw data allocations */ } /* end if */ @@ -1553,9 +1551,18 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, if(H5P_get(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,&block_before_meta_write)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get H5AC property") +#if 0 /* JRM */ + /* The metadata cache now only writes from process 0, which makes + * this synchronization incorrect. I'm leaving this code commented + * out instead of deleting it to remind us that we should re-write + * this function so that a metadata write from any other process + * should flag an error. + * -- JRM 9/1/05 + */ if(block_before_meta_write) if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) +#endif /* JRM */ /* Only one process will do the actual write if all procs in comm write same metadata */ if (file->mpi_rank != H5_PAR_META_WRITE) { @@ -1616,11 +1623,22 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, file->eof = HADDR_UNDEF; done: - /* if only one process writes, need to broadcast the ret_value to other processes */ + +#if 0 /* JRM */ + /* Since metadata writes are now done by process 0 only, this broadcast + * is no longer needed. I leave it in and commented out to remind us + * that we need to re-work this function to reflect this reallity. + * + * -- JRM 9/1/05 + */ + /* if only one process writes, need to broadcast the ret_value to + * other processes + */ if (type!=H5FD_MEM_DRAW) { if (MPI_SUCCESS != (mpi_code=MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm))) HMPI_DONE_ERROR(FAIL, "MPI_Bcast failed", mpi_code) } /* end if */ +#endif /* JRM */ #ifdef H5FDmpio_DEBUG if (H5FD_mpio_Debug[(int)'t']) diff --git a/src/H5FDmpiposix.c b/src/H5FDmpiposix.c index 11e7849..de491f0 100644 --- a/src/H5FDmpiposix.c +++ b/src/H5FDmpiposix.c @@ -910,6 +910,12 @@ done: * * Modifications: * + * John Mainzer -- 9/21/05 + * Modified code to turn off the + * H5FD_FEAT_ACCUMULATE_METADATA_WRITE flag. + * With the movement of all cache writes to process 0, + * this flag has become problematic in PHDF5. + * *------------------------------------------------------------------------- */ static herr_t @@ -923,15 +929,6 @@ H5FD_mpiposix_query(const H5FD_t UNUSED *_file, unsigned long *flags /* out */) if(flags) { *flags=0; *flags|=H5FD_FEAT_AGGREGATE_METADATA; /* OK to aggregate metadata allocations */ - - /* Distinguish between updating the metadata accumulator on writes and - * reads. This is particularly (perhaps only, even) important for MPI-I/O - * where we guarantee that writes are collective, but reads may not be. - * If we were to allow the metadata accumulator to be written during a - * read operation, the application would hang. - */ - *flags|=H5FD_FEAT_ACCUMULATE_METADATA_WRITE; /* OK to accumulate metadata for faster writes */ - *flags|=H5FD_FEAT_AGGREGATE_SMALLDATA; /* OK to aggregate "small" raw data allocations */ } /* end if */ @@ -1235,6 +1232,14 @@ H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list") /* Metadata specific actions */ + /* All metadata is now written from process 0 -- thus this function + * needs to be re-written to reflect this. For now I have simply + * commented out the code that attempts to synchronize metadata + * writes between processes, but we should really just flag an error + * whenever any process other than process 0 attempts to write + * metadata. + * -- JRM 9/1/05 + */ if(type!=H5FD_MEM_DRAW) { unsigned block_before_meta_write=0; /* Whether to block before a metadata write */ @@ -1252,9 +1257,11 @@ H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, if(H5P_get(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,&block_before_meta_write)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get H5AC property") +#if 0 /* JRM */ if(block_before_meta_write) if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) +#endif /* JRM */ /* Only one process will do the actual write if all procs in comm write same metadata */ if (file->mpi_rank != H5_PAR_META_WRITE) @@ -1328,6 +1335,14 @@ done: file->pos = HADDR_UNDEF; file->op = OP_UNKNOWN; } /* end if */ +#if 0 /* JRM */ + /* Since metadata writes are now done by process 0 only, this broadcast + * is no longer needed. I leave it in and commented out to remind us + * that we need to re-work this function to reflect this reallity. + * + * -- JRM 9/1/05 + */ + /* Guard against getting into metadata broadcast in failure cases */ else { /* when only one process writes, need to broadcast the ret_value to other processes */ @@ -1336,6 +1351,7 @@ done: HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) } /* end if */ } /* end else */ +#endif /* JRM */ FUNC_LEAVE_NOAPI(ret_value) } /* end H5FD_mpiposix_write() */ diff --git a/src/H5FDmulti.c b/src/H5FDmulti.c index 71b5c91..a1a065e 100644 --- a/src/H5FDmulti.c +++ b/src/H5FDmulti.c @@ -1628,7 +1628,14 @@ H5FD_multi_alloc(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, hsize_t size) if (HADDR_UNDEF==(addr=H5FDalloc(file->memb[mmt], type, dxpl_id, size))) H5Epush_ret(func, H5E_ERR_CLS, H5E_INTERNAL, H5E_BADVALUE, "member file can't alloc", HADDR_UNDEF) addr += file->fa.memb_addr[mmt]; - if (addr+size>file->eoa) file->eoa = addr+size; + if ( addr + size > file->eoa ) { + + if ( H5FD_multi_set_eoa(_file, addr + size) < 0 ) { + + H5Epush_ret(func, H5E_ERR_CLS, H5E_INTERNAL, H5E_BADVALUE, \ + "can't set eoa", HADDR_UNDEF) + } + } return addr; } diff --git a/src/H5Fprivate.h b/src/H5Fprivate.h index b9d9d74..53d3f05 100644 --- a/src/H5Fprivate.h +++ b/src/H5Fprivate.h @@ -448,6 +448,7 @@ H5_DLL haddr_t H5F_get_eoa(const H5F_t *f); #ifdef H5_HAVE_PARALLEL H5_DLL int H5F_mpi_get_rank(const H5F_t *f); H5_DLL MPI_Comm H5F_mpi_get_comm(const H5F_t *f); +H5_DLL int H5F_mpi_get_size(const H5F_t *f); #endif /* H5_HAVE_PARALLEL */ /* Functions than check file mounting information */ diff --git a/src/H5HL.c b/src/H5HL.c index ee31ecc..9de9f5b 100644 --- a/src/H5HL.c +++ b/src/H5HL.c @@ -155,7 +155,6 @@ H5HL_create(H5F_t *f, hid_t dxpl_id, size_t size_hint, haddr_t *addr_p/*out*/) heap->addr = *addr_p + (hsize_t)sizeof_hdr; heap->disk_alloc = size_hint; heap->mem_alloc = size_hint; - heap->disk_resrv = 0; if (NULL==(heap->chunk = H5FL_BLK_CALLOC(heap_chunk,(sizeof_hdr + size_hint)))) HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed"); @@ -320,6 +319,20 @@ done: * * Modifications: * + * John Mainzer, 8/10/05 + * Reworked this function for a different role. + * + * It used to be called during cache eviction, where it + * attempted to size the disk space allocation for the + * actuall size of the heap. However, this causes problems + * in the parallel case, as the reuslting disk allocations + * may not be synchronized. + * + * It is now called from H5HL_remove(), where it is used to + * reduce heap size in response to an entry deletion. This + * means that the function should either do nothing, or + * reduce the size of the disk allocation. + * *------------------------------------------------------------------------- */ static herr_t @@ -331,19 +344,13 @@ H5HL_minimize_heap_space(H5F_t *f, hid_t dxpl_id, H5HL_t *heap) FUNC_ENTER_NOAPI(H5HL_minimize_heap_space, FAIL) /* check args */ - assert(f); - assert(heap); + HDassert( f ); + HDassert( heap ); + HDassert( heap->disk_alloc == heap->mem_alloc ); sizeof_hdr = H5HL_SIZEOF_HDR(f); /* cache H5HL header size for file */ /* - * When the heap is being flushed to disk, release the file space reserved - * for it. - */ - H5MF_free_reserved(f, (hsize_t)heap->disk_resrv); - heap->disk_resrv = 0; - - /* * Check to see if we can reduce the size of the heap in memory by * eliminating free blocks at the tail of the buffer before flushing the * buffer out. @@ -427,13 +434,15 @@ H5HL_minimize_heap_space(H5F_t *f, hid_t dxpl_id, H5HL_t *heap) } /* - * If the heap grew larger or smaller than disk storage then move the + * If the heap grew smaller than disk storage then move the * data segment of the heap to another contiguous block of disk * storage. */ if (heap->mem_alloc != heap->disk_alloc) { haddr_t old_addr = heap->addr, new_addr; + HDassert( heap->mem_alloc < heap->disk_alloc ); + /* Release old space on disk */ H5_CHECK_OVERFLOW(heap->disk_alloc, size_t, hsize_t); H5MF_xfree(f, H5FD_MEM_LHEAP, dxpl_id, old_addr, (hsize_t)heap->disk_alloc); @@ -453,7 +462,8 @@ H5HL_minimize_heap_space(H5F_t *f, hid_t dxpl_id, H5HL_t *heap) done: FUNC_LEAVE_NOAPI(ret_value) -} + +} /* H5HL_minimize_heap_space() */ /*------------------------------------------------------------------------- @@ -543,6 +553,13 @@ H5HL_serialize(H5F_t *f, H5HL_t *heap, uint8_t *buf) * * Bill Wendling, 2003-09-16 * Separated out the bit that serializes the heap. + * + * John Mainzer, 2005-08-10 + * Removed call to H5HL_minimize_heap_space(). It does disk space + * allocation, which can cause problems if done at flush time. + * Instead, disk space allocation/deallocation is now done at + * insert/remove time. + * *------------------------------------------------------------------------- */ static herr_t @@ -553,21 +570,18 @@ H5HL_flush(H5F_t *f, hid_t dxpl_id, hbool_t destroy, haddr_t addr, H5HL_t *heap) FUNC_ENTER_NOAPI(H5HL_flush, FAIL); /* check arguments */ - assert(f); - assert(H5F_addr_defined(addr)); - assert(heap); + HDassert( f ); + HDassert( H5F_addr_defined(addr) ); + HDassert( heap ); + HDassert( heap->disk_alloc == heap->mem_alloc ); if (heap->cache_info.is_dirty) { haddr_t hdr_end_addr; size_t sizeof_hdr = H5HL_SIZEOF_HDR(f); /* cache H5HL header size for file */ - /* Minimize the heap space size if possible */ - if (H5HL_minimize_heap_space(f, dxpl_id, heap) < 0) - HGOTO_ERROR(H5E_HEAP, H5E_CANTFREE, FAIL, "unable to minimize local heap space") - /* Write the header */ if (H5HL_serialize(f, heap, heap->chunk) < 0) - HGOTO_ERROR(H5E_BTREE, H5E_CANTSERIALIZE, FAIL, "unable to serialize local heap") + HGOTO_ERROR(H5E_HEAP, H5E_CANTSERIALIZE, FAIL, "unable to serialize local heap") /* Copy buffer to disk */ hdr_end_addr = addr + (hsize_t)sizeof_hdr; @@ -951,6 +965,10 @@ H5HL_remove_free(H5HL_t *heap, H5HL_free_t *fl) * H5AC_unprotect() instead of manipulating the is_dirty * field of the cache info directly. * + * John Mainzer, 8/10/05 + * Modified code to allocate file space as needed, instead + * of allocating it on eviction. + * *------------------------------------------------------------------------- */ size_t @@ -959,10 +977,12 @@ H5HL_insert(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t buf_size, const void * H5HL_t *heap = NULL; unsigned heap_flags = H5AC__NO_FLAGS_SET; H5HL_free_t *fl = NULL, *max_fl = NULL; + htri_t tri_result; + herr_t result; size_t offset = 0; size_t need_size, old_size, need_more; + size_t new_disk_alloc; hbool_t found; - size_t disk_resrv; /* Amount of additional space to reserve in file */ size_t sizeof_hdr; /* Cache H5HL header size for file */ size_t ret_value; /* Return value */ @@ -1028,18 +1048,54 @@ H5HL_insert(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t buf_size, const void * if (found==FALSE) { need_more = MAX3(need_size, heap->mem_alloc, H5HL_SIZEOF_FREE(f)); - /* Reserve space in the file to hold the increased heap size - */ - if( heap->disk_resrv == heap->mem_alloc) - disk_resrv = need_more; - else - disk_resrv = heap->mem_alloc + need_more - heap->disk_resrv; + new_disk_alloc = heap->disk_alloc + need_more; + HDassert( heap->disk_alloc < new_disk_alloc ); + H5_CHECK_OVERFLOW(heap->disk_alloc, size_t, hsize_t); + H5_CHECK_OVERFLOW(new_disk_alloc, size_t, hsize_t); + + /* extend the current heap if we can... */ + tri_result = H5MF_can_extend(f, H5FD_MEM_LHEAP, heap->addr, + (hsize_t)(heap->disk_alloc), + (hsize_t)need_more); + if ( tri_result == TRUE ) { + + result = H5MF_extend(f, H5FD_MEM_LHEAP, heap->addr, + (hsize_t)(heap->disk_alloc), + (hsize_t)need_more); + if ( result < 0 ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, (size_t)(-1), \ + "can't extend heap on disk"); + } + + heap->disk_alloc = new_disk_alloc; + + } else { /* ...if we can't, allocate a new chunk & release the old */ + + haddr_t old_addr = heap->addr; + haddr_t new_addr; + + /* The new allocation may fail -- to avoid the possiblity of + * file corruption, allocate the new heap first, and then + * deallocate the old. + */ + + /* allocate new disk space for the heap */ + if ( (new_addr = H5MF_alloc(f, H5FD_MEM_LHEAP, dxpl_id, + (hsize_t)new_disk_alloc)) == HADDR_UNDEF ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, (size_t)(-1), \ + "unable to allocate file space for heap") + } - if( H5MF_reserve(f, (hsize_t)disk_resrv) < 0 ) - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, (size_t)(-1), "unable to reserve space in file"); + /* Release old space on disk */ + H5MF_xfree(f, H5FD_MEM_LHEAP, dxpl_id, old_addr, + (hsize_t)heap->disk_alloc); + H5E_clear_stack(NULL); /* don't really care if the free failed */ - /* Update heap's record of how much space it has reserved */ - heap->disk_resrv += disk_resrv; + heap->addr = new_addr; + heap->disk_alloc = new_disk_alloc; + } if (max_fl && max_fl->offset + max_fl->size == heap->mem_alloc) { /* @@ -1117,7 +1173,8 @@ done: HDONE_ERROR(H5E_HEAP, H5E_PROTECT, (size_t)(-1), "unable to release object header"); FUNC_LEAVE_NOAPI(ret_value); -} + +} /* H5HL_insert() */ #ifdef NOT_YET @@ -1217,6 +1274,11 @@ done: * H5AC_unprotect() instead of manipulating the is_dirty * field of the cache info directly. * + * John Mainzer, 8/10/05 + * Modified code to attempt to decrease heap size if the + * entry removal results in a free list entry at the end + * of the heap that is at least half the size of the heap. + * *------------------------------------------------------------------------- */ herr_t @@ -1225,15 +1287,15 @@ H5HL_remove(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t offset, size_t size) H5HL_t *heap = NULL; unsigned heap_flags = H5AC__NO_FLAGS_SET; H5HL_free_t *fl = NULL, *fl2 = NULL; - herr_t ret_value=SUCCEED; /* Return value */ + herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(H5HL_remove, FAIL); /* check arguments */ - assert(f); - assert(H5F_addr_defined(addr)); - assert(size > 0); - assert (offset==H5HL_ALIGN (offset)); + HDassert( f ); + HDassert( H5F_addr_defined(addr) ); + HDassert( size > 0 ); + HDassert( offset == H5HL_ALIGN(offset) ); if (0==(f->intent & H5F_ACC_RDWR)) HGOTO_ERROR (H5E_HEAP, H5E_WRITEERROR, FAIL, "no write intent on file"); @@ -1243,8 +1305,9 @@ H5HL_remove(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t offset, size_t size) if (NULL == (heap = H5AC_protect(f, dxpl_id, H5AC_LHEAP, addr, NULL, NULL, H5AC_WRITE))) HGOTO_ERROR(H5E_HEAP, H5E_PROTECT, FAIL, "unable to load heap"); - assert(offset < heap->mem_alloc); - assert(offset + size <= heap->mem_alloc); + HDassert( offset < heap->mem_alloc ); + HDassert( offset + size <= heap->mem_alloc ); + HDassert( heap->disk_alloc == heap->mem_alloc ); fl = heap->freelist; heap_flags |= H5AC__DIRTIED_FLAG; @@ -1268,10 +1331,29 @@ H5HL_remove(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t offset, size_t size) assert (fl->offset==H5HL_ALIGN (fl->offset)); assert (fl->size==H5HL_ALIGN (fl->size)); fl2 = H5HL_remove_free(heap, fl2); + if ( ( (fl->offset + fl->size) == heap->mem_alloc ) && + ( (2 * fl->size) > heap->mem_alloc ) ) { + + if ( H5HL_minimize_heap_space(f, dxpl_id, heap) != + SUCCEED ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, \ + "heap size minimization failed"); + } + } HGOTO_DONE(SUCCEED); } fl2 = fl2->next; } + if ( ( (fl->offset + fl->size) == heap->mem_alloc ) && + ( (2 * fl->size) > heap->mem_alloc ) ) { + + if ( H5HL_minimize_heap_space(f, dxpl_id, heap) != SUCCEED ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, \ + "heap size minimization failed"); + } + } HGOTO_DONE(SUCCEED); } else if (fl->offset + fl->size == offset) { @@ -1283,10 +1365,29 @@ H5HL_remove(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t offset, size_t size) fl->size += fl2->size; assert (fl->size==H5HL_ALIGN (fl->size)); fl2 = H5HL_remove_free(heap, fl2); + if ( ( (fl->offset + fl->size) == heap->mem_alloc ) && + ( (2 * fl->size) > heap->mem_alloc ) ) { + + if ( H5HL_minimize_heap_space(f, dxpl_id, heap) != + SUCCEED ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, \ + "heap size minimization failed"); + } + } HGOTO_DONE(SUCCEED); } fl2 = fl2->next; } + if ( ( (fl->offset + fl->size) == heap->mem_alloc ) && + ( (2 * fl->size) > heap->mem_alloc ) ) { + + if ( H5HL_minimize_heap_space(f, dxpl_id, heap) != SUCCEED ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, \ + "heap size minimization failed"); + } + } HGOTO_DONE(SUCCEED); } fl = fl->next; @@ -1321,6 +1422,16 @@ H5HL_remove(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t offset, size_t size) heap->freelist->prev = fl; heap->freelist = fl; + if ( ( (fl->offset + fl->size) == heap->mem_alloc ) && + ( (2 * fl->size) > heap->mem_alloc ) ) { + + if ( H5HL_minimize_heap_space(f, dxpl_id, heap) != SUCCEED ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, \ + "heap size minimization failed"); + } + } + done: if (heap && H5AC_unprotect(f, dxpl_id, H5AC_LHEAP, addr, heap, heap_flags) != SUCCEED) HDONE_ERROR(H5E_HEAP, H5E_PROTECT, FAIL, "unable to release object header"); diff --git a/src/H5HLpkg.h b/src/H5HLpkg.h index 1a61866..8b099cc 100644 --- a/src/H5HLpkg.h +++ b/src/H5HLpkg.h @@ -67,7 +67,6 @@ struct H5HL_t { haddr_t addr; /*address of data */ size_t disk_alloc; /*data bytes allocated on disk */ size_t mem_alloc; /*data bytes allocated in mem */ - size_t disk_resrv; /*data bytes "reserved" on disk */ uint8_t *chunk; /*the chunk, including header */ H5HL_free_t *freelist; /*the free list */ }; diff --git a/src/H5MF.c b/src/H5MF.c index f4ce055..6f68027 100644 --- a/src/H5MF.c +++ b/src/H5MF.c @@ -399,9 +399,9 @@ done: * * Purpose: Extend a block in the file. * - * Return: Success: TRUE(1)/FALSE(0) + * Return: Success: Non-negative * - * Failure: FAIL + * Failure: Negative * * Programmer: Quincey Koziol * Saturday, June 12, 2004 @@ -410,10 +410,10 @@ done: * *------------------------------------------------------------------------- */ -htri_t +herr_t H5MF_extend(H5F_t *f, H5FD_mem_t type, haddr_t addr, hsize_t size, hsize_t extra_requested) { - htri_t ret_value; /* Return value */ + herr_t ret_value; /* Return value */ FUNC_ENTER_NOAPI(H5MF_extend, FAIL); diff --git a/src/H5MFprivate.h b/src/H5MFprivate.h index ee35c2a..01b8204 100644 --- a/src/H5MFprivate.h +++ b/src/H5MFprivate.h @@ -53,7 +53,7 @@ H5_DLL herr_t H5MF_free_reserved(H5F_t *f, hsize_t size); H5_DLL hbool_t H5MF_alloc_overflow(H5F_t *f, hsize_t size); H5_DLL htri_t H5MF_can_extend(H5F_t *f, H5FD_mem_t type, haddr_t addr, hsize_t size, hsize_t extra_requested); -H5_DLL htri_t H5MF_extend(H5F_t *f, H5FD_mem_t type, haddr_t addr, hsize_t size, +H5_DLL herr_t H5MF_extend(H5F_t *f, H5FD_mem_t type, haddr_t addr, hsize_t size, hsize_t extra_requested); #endif diff --git a/src/H5O.c b/src/H5O.c index c1d8d46..81da26f 100644 --- a/src/H5O.c +++ b/src/H5O.c @@ -89,10 +89,12 @@ static int H5O_append_real(H5F_t *f, hid_t dxpl_id, H5O_t *oh, unsigned * oh_flags_ptr); static herr_t H5O_remove_real(H5G_entry_t *ent, const H5O_class_t *type, int sequence, H5O_operator_t op, void *op_data, hbool_t adj_link, hid_t dxpl_id); -static unsigned H5O_alloc(H5F_t *f, H5O_t *oh, const H5O_class_t *type, - size_t size, unsigned * oh_flags_ptr); -static unsigned H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, unsigned chunkno, size_t size); -static unsigned H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size); +static unsigned H5O_alloc(H5F_t *f, hid_t dxpl_id, H5O_t *oh, + const H5O_class_t *type, size_t size, hbool_t * oh_dirtied_ptr); +static htri_t H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, + unsigned chunkno, size_t size, unsigned * msg_idx); +static unsigned H5O_alloc_new_chunk(H5F_t *f, hid_t dxpl_id, H5O_t *oh, + size_t size); static herr_t H5O_delete_oh(H5F_t *f, hid_t dxpl_id, H5O_t *oh); static herr_t H5O_delete_mesg(H5F_t *f, hid_t dxpl_id, H5O_mesg_t *mesg, hbool_t adj_link); @@ -582,8 +584,12 @@ H5O_load(H5F_t *f, hid_t dxpl_id, haddr_t addr, const void UNUSED * _udata1, p += 3; /*reserved*/ /* Try to detect invalidly formatted object header messages */ - if (p + mesg_size > oh->chunk[chunkno].image + chunk_size) - HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, NULL, "corrupt object header"); + if (p + mesg_size > oh->chunk[chunkno].image + chunk_size) { + + HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, NULL, \ + "corrupt object header"); + } + /* Skip header messages we don't know about */ /* (Usually from future versions of the library */ @@ -740,17 +746,12 @@ H5O_flush(H5F_t *f, hid_t dxpl_id, hbool_t destroy, haddr_t addr, H5O_t *oh) /* allocate file space for chunks that have none yet */ if (H5O_CONT_ID == curr_msg->type->id && !H5F_addr_defined(((H5O_cont_t *)(curr_msg->native))->addr)) { - cont = (H5O_cont_t *) (curr_msg->native); - assert(cont->chunkno < oh->nchunks); - assert(!H5F_addr_defined(oh->chunk[cont->chunkno].addr)); - cont->size = oh->chunk[cont->chunkno].size; - - /* Free the space we'd reserved in the file to hold this chunk */ - H5MF_free_reserved(f, (hsize_t)cont->size); - - if (HADDR_UNDEF==(cont->addr=H5MF_alloc(f, H5FD_MEM_OHDR, dxpl_id, (hsize_t)cont->size))) - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "unable to allocate space for object header data"); - oh->chunk[cont->chunkno].addr = cont->addr; + /* We now allocate disk space on insertion, instead + * of on flush from the cache, so this case is now an + * error. -- JRM + */ + HGOTO_ERROR(H5E_OHDR, H5E_SYSTEM, FAIL, + "File space for message not allocated!?!"); } /* @@ -1972,7 +1973,7 @@ H5O_modify_real(H5G_entry_t *ent, const H5O_class_t *type, int overwrite, /* Update the modification time message if any */ if(update_flags&H5O_UPDATE_TIME) - H5O_touch_oh(ent->file, oh, FALSE, &oh_flags); + H5O_touch_oh(ent->file, dxpl_id, oh, FALSE, &oh_flags); /* Set return value */ ret_value = sequence; @@ -2258,7 +2259,8 @@ H5O_new_mesg(H5F_t *f, H5O_t *oh, unsigned *flags, const H5O_class_t *orig_type, HGOTO_ERROR (H5E_OHDR, H5E_CANTINIT, UFAIL, "object header message is too large (16k max)"); /* Allocate space in the object headed for the message */ - if ((ret_value = H5O_alloc(f, oh, orig_type, size, oh_flags_ptr)) == UFAIL) + if ((ret_value = H5O_alloc(f, dxpl_id, oh, + orig_type, size, oh_flags_ptr)) == UFAIL) HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, UFAIL, "unable to allocate space for message"); /* Increment any links in message */ @@ -2346,10 +2348,18 @@ done: * In this case, that requires the addition of the oh_dirtied_ptr * parameter to track whether *oh is dirty. * + * John Mainzer, 8/20/05 + * Added dxpl_id parameter needed by the revised version of + * H5O_alloc(). + * *------------------------------------------------------------------------- */ herr_t -H5O_touch_oh(H5F_t *f, H5O_t *oh, hbool_t force, unsigned * oh_flags_ptr) +H5O_touch_oh(H5F_t *f, + hid_t dxpl_id, + H5O_t *oh, + hbool_t force, + unsigned * oh_flags_ptr) { unsigned idx; #ifdef H5_HAVE_GETTIMEOFDAY @@ -2382,7 +2392,9 @@ H5O_touch_oh(H5F_t *f, H5O_t *oh, hbool_t force, unsigned * oh_flags_ptr) if (!force) HGOTO_DONE(SUCCEED); /*nothing to do*/ size = (H5O_MTIME_NEW->raw_size)(f, &now); - if ((idx=H5O_alloc(f, oh, H5O_MTIME_NEW, size, oh_flags_ptr))==UFAIL) + + if ((idx=H5O_alloc(f, dxpl_id, oh, H5O_MTIME_NEW, + size, oh_flags_ptr))==UFAIL) HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, FAIL, "unable to allocate space for modification time message"); } @@ -2442,7 +2454,7 @@ H5O_touch(H5G_entry_t *ent, hbool_t force, hid_t dxpl_id) HGOTO_ERROR(H5E_OHDR, H5E_CANTLOAD, FAIL, "unable to load object header"); /* Create/Update the modification time message */ - if (H5O_touch_oh(ent->file, oh, force, &oh_flags)<0) + if (H5O_touch_oh(ent->file, dxpl_id, oh, force, &oh_flags)<0) HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, FAIL, "unable to update object modificaton time"); done: @@ -2473,10 +2485,13 @@ done: * In this case, that requires the addition of the oh_dirtied_ptr * parameter to track whether *oh is dirty. * + * John Mainzer, 8/20/05 + * Added dxpl_id parameter needed by call to H5O_alloc(). + * *------------------------------------------------------------------------- */ herr_t -H5O_bogus_oh(H5F_t *f, H5O_t *oh, hbool_t * oh_flags_ptr) +H5O_bogus_oh(H5F_t *f, hid_t dxpl_id, H5O_t *oh, hbool_t * oh_flags_ptr) { int idx; size_t size; @@ -2496,7 +2511,7 @@ H5O_bogus_oh(H5F_t *f, H5O_t *oh, hbool_t * oh_flags_ptr) /* Create a new message */ if (idx==oh->nmesgs) { size = (H5O_BOGUS->raw_size)(f, NULL); - if ((idx=H5O_alloc(f, oh, H5O_BOGUS, size, oh_flags_ptr))<0) + if ((idx=H5O_alloc(f, dxpl_id, oh, H5O_BOGUS, size, oh_flags_ptr))<0) HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, FAIL, "unable to allocate space for 'bogus' message"); /* Allocate the native message in memory */ @@ -2507,6 +2522,7 @@ H5O_bogus_oh(H5F_t *f, H5O_t *oh, hbool_t * oh_flags_ptr) ((H5O_bogus_t *)(oh->mesg[idx].native))->u = H5O_BOGUS_VALUE; /* Mark the message and object header as dirty */ + *oh_flags_ptr = TRUE; oh->mesg[idx].dirty = TRUE; oh->dirty = TRUE; } /* end if */ @@ -2558,7 +2574,7 @@ H5O_bogus(H5G_entry_t *ent, hid_t dxpl_id) HGOTO_ERROR(H5E_OHDR, H5E_CANTLOAD, FAIL, "unable to load object header"); /* Create the "bogus" message */ - if (H5O_bogus_oh(ent->file, oh, &oh_flags)<0) + if (H5O_bogus_oh(ent->file, dxpl_id, oh, &oh_flags)<0) HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, FAIL, "unable to update object 'bogus' message"); done: @@ -2816,71 +2832,115 @@ done: } /* end H5O_remove_real() */ -/*------------------------------------------------------------------------- - * Function: H5O_alloc_extend_chunk +/*------------------------------------------------------------------------- * - * Purpose: Extends a chunk which hasn't been allocated on disk yet - * to make the chunk large enough to contain a message whose - * data size is exactly SIZE bytes (SIZE need not be aligned). + * Function: H5O_alloc_extend_chunk * - * If the last message of the chunk is the null message, then - * that message will be extended with the chunk. Otherwise a - * new null message is created. + * Purpose: Attempt to extend a chunk that is allocated on disk. * - * F is the file in which the chunk will be written. It is - * included to ensure that there is enough space to extend - * this chunk. + * If the extension is successful, and if the last message + * of the chunk is the null message, then that message will + * be extended with the chunk. Otherwise a new null message + * is created. * - * Return: Success: Message index for null message which - * is large enough to hold SIZE bytes. + * f is the file in which the chunk will be written. It is + * included to ensure that there is enough space to extend + * this chunk. * - * Failure: Negative + * Return: TRUE: The chunk has been extended, and *msg_idx + * contains the message index for null message + * which is large enough to hold size bytes. * - * Programmer: Robb Matzke - * matzke@llnl.gov - * Aug 7 1997 + * FALSE: The chunk cannot be extended, and *msg_idx + * is undefined. + * + * FAIL: Some internal error has been detected. + * + * Programmer: John Mainzer -- 8/16/05 * * Modifications: - * Robb Matzke, 1999-08-26 - * If new memory is allocated as a multiple of some alignment - * then we're careful to initialize the part of the new memory - * from the end of the expected message to the end of the new - * memory. + * *------------------------------------------------------------------------- */ -static unsigned -H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, unsigned chunkno, size_t size) +static htri_t +H5O_alloc_extend_chunk(H5F_t *f, + H5O_t *oh, + unsigned chunkno, + size_t size, + unsigned * msg_idx) { - unsigned u; - unsigned idx; - size_t delta, old_size; - size_t aligned_size = H5O_ALIGN(size); - uint8_t *old_addr; - unsigned ret_value; + unsigned u; + unsigned idx; + unsigned i; + size_t delta, old_size; + size_t aligned_size = H5O_ALIGN(size); + uint8_t *old_addr; + herr_t result; + htri_t tri_result; + htri_t ret_value; /* return value */ + hbool_t cont_updated; FUNC_ENTER_NOAPI_NOINIT(H5O_alloc_extend_chunk); /* check args */ - assert(oh); - assert(chunkno < oh->nchunks); - assert(size > 0); + HDassert( f != NULL ); + HDassert( oh != NULL ); + HDassert( chunkno < oh->nchunks ); + HDassert( size > 0 ); + HDassert( msg_idx != NULL ); - if (H5F_addr_defined(oh->chunk[chunkno].addr)) - HGOTO_ERROR(H5E_OHDR, H5E_NOSPACE, UFAIL, "chunk is on disk"); + if ( !H5F_addr_defined(oh->chunk[chunkno].addr) ) { + + HGOTO_ERROR(H5E_OHDR, H5E_NOSPACE, FAIL, "chunk isn't on disk"); + } + + /* Test to see if the specified chunk ends with a null messages. If + * it does, try to extend the chunk and (thereby) the null message. + * If successful, return the index of the the null message in *msg_idx. + */ + for ( idx = 0; idx < oh->nmesgs; idx++ ) + { + if (oh->mesg[idx].chunkno==chunkno) { - /* try to extend a null message */ - for (idx=0; idxnmesgs; idx++) { - if (oh->mesg[idx].chunkno==chunkno) { if (H5O_NULL_ID == oh->mesg[idx].type->id && (oh->mesg[idx].raw + oh->mesg[idx].raw_size == oh->chunk[chunkno].image + oh->chunk[chunkno].size)) { - delta = MAX (H5O_MIN_SIZE, aligned_size - oh->mesg[idx].raw_size); - assert (delta=H5O_ALIGN (delta)); + delta = MAX(H5O_MIN_SIZE, + aligned_size - oh->mesg[idx].raw_size); + + HDassert( delta == H5O_ALIGN(delta) ); + + /* determine whether the chunk can be extended */ + tri_result = H5MF_can_extend(f, H5FD_MEM_OHDR, + oh->chunk[chunkno].addr, + (hsize_t)(oh->chunk[chunkno].size), + (hsize_t)delta); + + if ( tri_result == FALSE ) { /* can't extend -- we are done */ + + HGOTO_DONE(FALSE); + + } else if ( tri_result != TRUE ) { /* system error */ - /* Reserve space in the file to hold the increased chunk size */ - if( H5MF_reserve(f, (hsize_t)delta) < 0 ) - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, "unable to reserve space in file"); + HGOTO_ERROR (H5E_RESOURCE, H5E_SYSTEM, FAIL, \ + "H5MF_can_extend() failed"); + } + + /* if we get this far, we should be able to extend the chunk */ + result = H5MF_extend(f, H5FD_MEM_OHDR, + oh->chunk[chunkno].addr, + (hsize_t)(oh->chunk[chunkno].size), + (hsize_t)delta); + + if ( result < 0 ) { /* system error */ + + HGOTO_ERROR (H5E_RESOURCE, H5E_SYSTEM, FAIL, \ + "H5MF_extend() failed."); + } + + + /* chunk size has been increased -- tidy up */ oh->mesg[idx].dirty = TRUE; oh->mesg[idx].raw_size += delta; @@ -2888,12 +2948,19 @@ H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, unsigned chunkno, size_t size) old_addr = oh->chunk[chunkno].image; /* Be careful not to indroduce garbage */ - oh->chunk[chunkno].image = H5FL_BLK_REALLOC(chunk_image,old_addr, - (oh->chunk[chunkno].size + delta)); - if (NULL==oh->chunk[chunkno].image) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); + oh->chunk[chunkno].image = + H5FL_BLK_REALLOC(chunk_image,old_addr, + (oh->chunk[chunkno].size + delta)); + + if ( NULL == oh->chunk[chunkno].image ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \ + "memory allocation failed"); + } + HDmemset(oh->chunk[chunkno].image + oh->chunk[chunkno].size, 0, delta); + oh->chunk[chunkno].size += delta; /* adjust raw addresses for messages of this chunk */ @@ -2904,35 +2971,106 @@ H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, unsigned chunkno, size_t size) (oh->mesg[u].raw - old_addr); } } - HGOTO_DONE(idx); + + /* adjust the continuation message pointing to this chunk + * for the increase in chunk size. + * + * As best I understand the code, it is not necessarily an + * error if there is no continuation message pointing to a + * chunk -- for example, chunk 0 seems to be pointed to by + * the object header. + */ + cont_updated = FALSE; + for ( i = 0; i < oh->nmesgs; i++ ) + { + if ( ( H5O_CONT_ID == oh->mesg[i].type->id ) && + ( ((H5O_cont_t *)(oh->mesg[i].native))->chunkno + == chunkno ) ) { + + HDassert( ((H5O_cont_t *)(oh->mesg[i].native))->size + == oh->chunk[chunkno].size - delta ); + + ((H5O_cont_t *)(oh->mesg[i].native))->size = + oh->chunk[chunkno].size; + + cont_updated = TRUE; + + /* there should be at most one continuation message + * pointing to this chunk, so we can quit when we find + * and update it. + */ + break; + } + } + HDassert( ( chunkno == 0 ) || ( cont_updated ) ); + + *msg_idx = idx; + HGOTO_DONE(TRUE); } } /* end if */ - } + } /* end for */ - /* Reserve space in the file */ + /* if we get this far, the specified chunk does not end in a null message. + * Attempt to extend the chunk, and if successful, fill the new section + * of the chunk with a null messages. + */ + + /* compute space needed in the file */ delta = MAX(H5O_MIN_SIZE, aligned_size+H5O_SIZEOF_MSGHDR(f)); delta = H5O_ALIGN(delta); - if( H5MF_reserve(f, (hsize_t)delta) < 0 ) - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, "unable to reserve space in file"); + /* determine whether the chunk can be extended */ + tri_result = H5MF_can_extend(f, H5FD_MEM_OHDR, + oh->chunk[chunkno].addr, + (hsize_t)(oh->chunk[chunkno].size), + (hsize_t)delta); + + if ( tri_result == FALSE ) { /* can't extend -- we are done */ + + HGOTO_DONE(FALSE); + + } else if ( tri_result != TRUE ) { /* system error */ + + HGOTO_ERROR(H5E_RESOURCE, H5E_SYSTEM, FAIL, "H5MF_can_extend() failed"); + } + + /* if we get this far, we should be able to extend the chunk */ + result = H5MF_extend(f, H5FD_MEM_OHDR, + oh->chunk[chunkno].addr, + (hsize_t)(oh->chunk[chunkno].size), + (hsize_t)delta); + + if ( result < 0 ) { /* system error */ + + HGOTO_ERROR(H5E_RESOURCE, H5E_SYSTEM, FAIL, \ + "H5MF_extend() failed"); + } + /* create a new null message */ - if (oh->nmesgs >= oh->alloc_nmesgs) { + if ( oh->nmesgs >= oh->alloc_nmesgs ) { + unsigned na = oh->alloc_nmesgs + H5O_NMESGS; + H5O_mesg_t *x = H5FL_SEQ_REALLOC (H5O_mesg_t, oh->mesg, (size_t)na); - if (NULL==x) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); + if ( NULL == x ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "memory allocation failed"); + } oh->alloc_nmesgs = na; oh->mesg = x; } + idx = oh->nmesgs++; + oh->mesg[idx].type = H5O_NULL; oh->mesg[idx].dirty = TRUE; oh->mesg[idx].native = NULL; oh->mesg[idx].raw = oh->chunk[chunkno].image + - oh->chunk[chunkno].size + - H5O_SIZEOF_MSGHDR(f); + oh->chunk[chunkno].size + + H5O_SIZEOF_MSGHDR(f); oh->mesg[idx].raw_size = delta - H5O_SIZEOF_MSGHDR(f); oh->mesg[idx].chunkno = chunkno; @@ -2940,74 +3078,125 @@ H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, unsigned chunkno, size_t size) old_size = oh->chunk[chunkno].size; oh->chunk[chunkno].size += delta; oh->chunk[chunkno].image = H5FL_BLK_REALLOC(chunk_image,old_addr, - oh->chunk[chunkno].size); - if (NULL==oh->chunk[chunkno].image) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); + oh->chunk[chunkno].size); + if (NULL==oh->chunk[chunkno].image) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "memory allocation failed"); + } + HDmemset(oh->chunk[chunkno].image+old_size, 0, oh->chunk[chunkno].size - old_size); /* adjust raw addresses for messages of this chunk */ if (old_addr != oh->chunk[chunkno].image) { - for (u = 0; u < oh->nmesgs; u++) { - if (oh->mesg[u].chunkno == chunkno) - oh->mesg[u].raw = oh->chunk[chunkno].image + - (oh->mesg[u].raw - old_addr); - } + for (u = 0; u < oh->nmesgs; u++) { + if (oh->mesg[u].chunkno == chunkno) + oh->mesg[u].raw = oh->chunk[chunkno].image + + (oh->mesg[u].raw - old_addr); + } + } + + /* adjust the continuation message pointing to this chunk for the + * increase in chunk size. + * + * As best I understand the code, it is not necessarily an error + * if there is no continuation message pointing to a chunk -- for + * example, chunk 0 seems to be pointed to by the object header. + */ + cont_updated = FALSE; + for ( i = 0; i < oh->nmesgs; i++ ) + { + if ( ( H5O_CONT_ID == oh->mesg[i].type->id ) && + ( ((H5O_cont_t *)(oh->mesg[i].native))->chunkno == chunkno ) ) { + + HDassert( ((H5O_cont_t *)(oh->mesg[i].native))->size == + oh->chunk[chunkno].size - delta ); + + ((H5O_cont_t *)(oh->mesg[i].native))->size = + oh->chunk[chunkno].size; + + cont_updated = TRUE; + + /* there should be at most one continuation message + * pointing to this chunk, so we can quit when we find + * and update it. + */ + break; + } } + HDassert( ( chunkno == 0 ) || ( cont_updated ) ); /* Set return value */ - ret_value=idx; + *msg_idx = idx; + ret_value = TRUE; done: + FUNC_LEAVE_NOAPI(ret_value); -} + +} /* H5O_alloc_extend_chunk() */ /*------------------------------------------------------------------------- - * Function: H5O_alloc_new_chunk + * Function: H5O_alloc_new_chunk * - * Purpose: Allocates a new chunk for the object header but doen't - * give the new chunk a file address yet. One of the other - * chunks will get an object continuation message. If there - * isn't room in any other chunk for the object continuation - * message, then some message from another chunk is moved into - * this chunk to make room. + * Purpose: Allocates a new chunk for the object header, including + * file space. * - * SIZE need not be aligned. + * One of the other chunks will get an object continuation + * message. If there isn't room in any other chunk for the + * object continuation message, then some message from + * another chunk is moved into this chunk to make room. * - * Return: Success: Index number of the null message for the - * new chunk. The null message will be at - * least SIZE bytes not counting the message - * ID or size fields. + * SIZE need not be aligned. * - * Failure: Negative + * Return: Success: Index number of the null message for the + * new chunk. The null message will be at + * least SIZE bytes not counting the message + * ID or size fields. * - * Programmer: Robb Matzke - * matzke@llnl.gov - * Aug 7 1997 + * Failure: Negative + * + * Programmer: Robb Matzke + * matzke@llnl.gov + * Aug 7 1997 * * Modifications: * + * John Mainzer, 8/17/05 + * Reworked function to allocate file space immediately, + * instead of just allocating core space (as it used to). + * This change was necessary, as we were allocating file + * space on metadata cache eviction, which need not be + * synchronized across all processes. As a result, + * different processes were allocating different file + * locations to the same chunk. + * *------------------------------------------------------------------------- */ static unsigned -H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size) +H5O_alloc_new_chunk(H5F_t *f, + hid_t dxpl_id, + H5O_t *oh, + size_t size) { - size_t cont_size; /*continuation message size */ - int found_null = (-1); /*best fit null message */ - int found_other = (-1); /*best fit other message */ - unsigned idx; /*message number */ - uint8_t *p = NULL; /*ptr into new chunk */ - H5O_cont_t *cont = NULL; /*native continuation message */ - int chunkno; - unsigned u; - unsigned ret_value; /*return value */ + size_t cont_size; /*continuation message size */ + int found_null = (-1); /*best fit null message */ + int found_other = (-1); /*best fit other message */ + unsigned idx; /*message number */ + uint8_t *p = NULL; /*ptr into new chunk */ + H5O_cont_t *cont = NULL; /*native continuation message */ + int chunkno; + unsigned u; + unsigned ret_value; /*return value */ + haddr_t new_chunk_addr; FUNC_ENTER_NOAPI_NOINIT(H5O_alloc_new_chunk); /* check args */ - assert (oh); - assert (size > 0); + HDassert (oh); + HDassert (size > 0); size = H5O_ALIGN(size); /* @@ -3018,23 +3207,23 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size) */ cont_size = H5O_ALIGN (H5F_SIZEOF_ADDR(f) + H5F_SIZEOF_SIZE(f)); for (u=0; unmesgs; u++) { - if (H5O_NULL_ID == oh->mesg[u].type->id) { - if (cont_size == oh->mesg[u].raw_size) { - found_null = u; - break; - } else if (oh->mesg[u].raw_size >= cont_size && - (found_null < 0 || - (oh->mesg[u].raw_size < - oh->mesg[found_null].raw_size))) { - found_null = u; - } - } else if (H5O_CONT_ID == oh->mesg[u].type->id) { - /*don't consider continuation messages */ - } else if (oh->mesg[u].raw_size >= cont_size && - (found_other < 0 || - oh->mesg[u].raw_size < oh->mesg[found_other].raw_size)) { - found_other = u; - } + if (H5O_NULL_ID == oh->mesg[u].type->id) { + if (cont_size == oh->mesg[u].raw_size) { + found_null = u; + break; + } else if (oh->mesg[u].raw_size >= cont_size && + (found_null < 0 || + (oh->mesg[u].raw_size < + oh->mesg[found_null].raw_size))) { + found_null = u; + } + } else if (H5O_CONT_ID == oh->mesg[u].type->id) { + /*don't consider continuation messages */ + } else if (oh->mesg[u].raw_size >= cont_size && + (found_other < 0 || + oh->mesg[u].raw_size < oh->mesg[found_other].raw_size)) { + found_other = u; + } } assert(found_null >= 0 || found_other >= 0); @@ -3043,36 +3232,47 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size) * message, then make sure the new chunk has enough room for that * other message. */ - if (found_null < 0) - size += H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size; + if ( found_null < 0 ) { + + size += H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size; + } /* * The total chunk size must include the requested space plus enough - * for the message header. This must be at least some minimum and a + * for the message header. This must be at least some minimum and a * multiple of the alignment size. */ size = MAX(H5O_MIN_SIZE, size + H5O_SIZEOF_MSGHDR(f)); assert (size == H5O_ALIGN (size)); - /* Reserve space in the file to hold the new chunk */ - if( H5MF_reserve(f, (hsize_t)size) < 0 ) - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, "unable to reserve space in file for new chunk"); + /* allocate space in file to hold the new chunk */ + new_chunk_addr = H5MF_alloc(f, H5FD_MEM_OHDR, dxpl_id, (hsize_t)size); + if ( HADDR_UNDEF == new_chunk_addr ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \ + "unable to allocate space for new chunk"); + } /* - * Create the new chunk without giving it a file address. + * Create the new chunk giving it a file address. */ - if (oh->nchunks >= oh->alloc_nchunks) { + if ( oh->nchunks >= oh->alloc_nchunks ) { + unsigned na = oh->alloc_nchunks + H5O_NCHUNKS; H5O_chunk_t *x = H5FL_SEQ_REALLOC (H5O_chunk_t, oh->chunk, (size_t)na); - if (!x) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); + if ( !x ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \ + "memory allocation failed"); + } oh->alloc_nchunks = na; oh->chunk = x; } + chunkno = oh->nchunks++; oh->chunk[chunkno].dirty = TRUE; - oh->chunk[chunkno].addr = HADDR_UNDEF; + oh->chunk[chunkno].addr = new_chunk_addr; oh->chunk[chunkno].size = size; if (NULL==(oh->chunk[chunkno].image = p = H5FL_BLK_CALLOC(chunk_image,size))) HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); @@ -3086,35 +3286,40 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size) unsigned na = oh->alloc_nmesgs + MAX (H5O_NMESGS, 3); H5O_mesg_t *x = H5FL_SEQ_REALLOC (H5O_mesg_t, oh->mesg, (size_t)na); - if (!x) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); + if ( !x ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \ + "memory allocation failed"); + } oh->alloc_nmesgs = na; oh->mesg = x; /* Set new object header info to zeros */ HDmemset(&oh->mesg[old_alloc], 0, - (oh->alloc_nmesgs-old_alloc)*sizeof(H5O_mesg_t)); + (oh->alloc_nmesgs-old_alloc)*sizeof(H5O_mesg_t)); } /* * Describe the messages of the new chunk. */ if (found_null < 0) { - found_null = u = oh->nmesgs++; - oh->mesg[u].type = H5O_NULL; - oh->mesg[u].dirty = TRUE; - oh->mesg[u].native = NULL; - oh->mesg[u].raw = oh->mesg[found_other].raw; - oh->mesg[u].raw_size = oh->mesg[found_other].raw_size; - oh->mesg[u].chunkno = oh->mesg[found_other].chunkno; - - oh->mesg[found_other].dirty = TRUE; + found_null = u = oh->nmesgs++; + oh->mesg[u].type = H5O_NULL; + oh->mesg[u].dirty = TRUE; + oh->mesg[u].native = NULL; + oh->mesg[u].raw = oh->mesg[found_other].raw; + oh->mesg[u].raw_size = oh->mesg[found_other].raw_size; + oh->mesg[u].chunkno = oh->mesg[found_other].chunkno; + + oh->mesg[found_other].dirty = TRUE; /* Copy the message to the new location */ - HDmemcpy(p+H5O_SIZEOF_MSGHDR(f),oh->mesg[found_other].raw,oh->mesg[found_other].raw_size); - oh->mesg[found_other].raw = p + H5O_SIZEOF_MSGHDR(f); - oh->mesg[found_other].chunkno = chunkno; - p += H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size; - size -= H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size; + HDmemcpy(p + H5O_SIZEOF_MSGHDR(f), + oh->mesg[found_other].raw, + oh->mesg[found_other].raw_size); + oh->mesg[found_other].raw = p + H5O_SIZEOF_MSGHDR(f); + oh->mesg[found_other].chunkno = chunkno; + p += H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size; + size -= H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size; } idx = oh->nmesgs++; oh->mesg[idx].type = H5O_NULL; @@ -3130,19 +3335,19 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size) * two null messages. */ if (oh->mesg[found_null].raw_size > cont_size) { - u = oh->nmesgs++; - oh->mesg[u].type = H5O_NULL; - oh->mesg[u].dirty = TRUE; - oh->mesg[u].native = NULL; - oh->mesg[u].raw = oh->mesg[found_null].raw + - cont_size + - H5O_SIZEOF_MSGHDR(f); - oh->mesg[u].raw_size = oh->mesg[found_null].raw_size - - (cont_size + H5O_SIZEOF_MSGHDR(f)); - oh->mesg[u].chunkno = oh->mesg[found_null].chunkno; - - oh->mesg[found_null].dirty = TRUE; - oh->mesg[found_null].raw_size = cont_size; + u = oh->nmesgs++; + oh->mesg[u].type = H5O_NULL; + oh->mesg[u].dirty = TRUE; + oh->mesg[u].native = NULL; + oh->mesg[u].raw = oh->mesg[found_null].raw + + cont_size + + H5O_SIZEOF_MSGHDR(f); + oh->mesg[u].raw_size = oh->mesg[found_null].raw_size - + (cont_size + H5O_SIZEOF_MSGHDR(f)); + oh->mesg[u].chunkno = oh->mesg[found_null].chunkno; + + oh->mesg[found_null].dirty = TRUE; + oh->mesg[found_null].raw_size = cont_size; } /* @@ -3150,10 +3355,13 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size) */ oh->mesg[found_null].type = H5O_CONT; oh->mesg[found_null].dirty = TRUE; - if (NULL==(cont = H5FL_MALLOC(H5O_cont_t))) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); - cont->addr = HADDR_UNDEF; - cont->size = 0; + if (NULL==(cont = H5FL_MALLOC(H5O_cont_t))) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \ + "memory allocation failed"); + } + cont->addr = oh->chunk[chunkno].addr; + cont->size = oh->chunk[chunkno].size; cont->chunkno = chunkno; oh->mesg[found_null].native = cont; @@ -3162,59 +3370,75 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size) done: FUNC_LEAVE_NOAPI(ret_value); -} + +} /* H5O_alloc_new_chunk() */ /*------------------------------------------------------------------------- - * Function: H5O_alloc + * Function: H5O_alloc * - * Purpose: Allocate enough space in the object header for this message. + * Purpose: Allocate enough space in the object header for this message. * - * Return: Success: Index of message + * Return: Success: Index of message * - * Failure: Negative + * Failure: Negative * - * Programmer: Robb Matzke - * matzke@llnl.gov - * Aug 6 1997 + * Programmer: Robb Matzke + * matzke@llnl.gov + * Aug 6 1997 * * Modifications: * * John Mainzer, 6/7/05 * Modified function to use the new dirtied parameter to - * H5AC_unprotect() instead of modfying the is_dirty field. + * H5AC_unprotect() instead of modfying the is_dirty field directly. * In this case, that requires the addition of the oh_dirtied_ptr * parameter to track whether *oh is dirty. * + * John Mainzer, 8/19/05 + * Reworked the function to allocate disk space immediately instead + * of waiting to cache eviction time. This is necessary since cache + * evictions need not be synchronized across the processes in the + * PHDF5 case. + * + * Note the use of a revised versions of H5O_alloc_new_chunk() and + * H5O_alloc_extend_chunk(). + * *------------------------------------------------------------------------- */ static unsigned -H5O_alloc(H5F_t *f, H5O_t *oh, const H5O_class_t *type, size_t size, unsigned * oh_flags_ptr) +H5O_alloc(H5F_t *f, + hid_t dxpl_id, + H5O_t *oh, + const H5O_class_t *type, + size_t size, + unsigned * oh_flags_ptr) { - unsigned idx; + unsigned idx = UFAIL; H5O_mesg_t *msg; /* Pointer to newly allocated message */ - size_t aligned_size = H5O_ALIGN(size); - unsigned ret_value; /* Return value */ + size_t aligned_size = H5O_ALIGN(size); + htri_t tri_result; + unsigned ret_value; /* Return value */ FUNC_ENTER_NOAPI_NOINIT(H5O_alloc); /* check args */ - assert (oh); - assert (type); - assert (oh_flags_ptr); + HDassert (oh); + HDassert (type); + HDassert (oh_flags_ptr); /* look for a null message which is large enough */ for (idx = 0; idx < oh->nmesgs; idx++) { - if (H5O_NULL_ID == oh->mesg[idx].type->id && + if (H5O_NULL_ID == oh->mesg[idx].type->id && oh->mesg[idx].raw_size >= aligned_size) - break; + break; } #ifdef LATER /* * Perhaps if we join adjacent null messages we could make one * large enough... we leave this as an exercise for future - * programmers :-) This isn't a high priority because when an + * programmers :-) This isn't a high priority because when an * object header is read from disk the null messages are combined * anyway. */ @@ -3222,63 +3446,92 @@ H5O_alloc(H5F_t *f, H5O_t *oh, const H5O_class_t *type, size_t size, unsigned * /* if we didn't find one, then allocate more header space */ if (idx >= oh->nmesgs) { - unsigned chunkno; + unsigned chunkno; - /* - * Look for a chunk which hasn't had disk space allocated yet - * since we can just increase the size of that chunk. - */ - for (chunkno = 0; chunkno < oh->nchunks; chunkno++) { - if ((idx = H5O_alloc_extend_chunk(f, oh, chunkno, size)) != UFAIL) { - break; - } - H5E_clear_stack(NULL); - } + /* check to see if we can extend one of the chunks. If we can, + * do so. Otherwise, we will have to allocate a new chunk. + * + * Note that in this new version of this function, all chunks + * must have file space allocated to them. + */ + for ( chunkno = 0; chunkno < oh->nchunks; chunkno++ ) + { + HDassert( H5F_addr_defined(oh->chunk[chunkno].addr) ); - /* - * Create a new chunk - */ - if (idx == UFAIL) { - if ((idx = H5O_alloc_new_chunk(f, oh, size)) == UFAIL) - HGOTO_ERROR(H5E_OHDR, H5E_NOSPACE, UFAIL, "unable to create a new object header data chunk"); - } + tri_result = H5O_alloc_extend_chunk(f, oh, chunkno, size, &idx); + + if ( tri_result == TRUE ) { + + break; + + } else if ( tri_result == FALSE ) { + + idx = UFAIL; + + } else { + + HGOTO_ERROR(H5E_OHDR, H5E_SYSTEM, UFAIL, \ + "H5O_alloc_extend_chunk failed unexpectedly"); + } + } + + /* if idx is still UFAIL, we were not able to extend a chunk. + * Create a new one. + */ + if (idx == UFAIL) { + + if ( (idx = H5O_alloc_new_chunk(f, dxpl_id, oh, size)) == UFAIL ) { + + HGOTO_ERROR(H5E_OHDR, H5E_NOSPACE, UFAIL, \ + "unable to create a new object header data chunk"); + } + } } /* Set pointer to newly allocated message */ - msg=&oh->mesg[idx]; + msg = &(oh->mesg[idx]); /* do we need to split the null message? */ if (msg->raw_size > aligned_size) { + H5O_mesg_t *null_msg; /* Pointer to null message */ - size_t mesg_size = aligned_size+ H5O_SIZEOF_MSGHDR(f); /* Total size of newly allocated message */ - assert(msg->raw_size - aligned_size >= H5O_SIZEOF_MSGHDR(f)); + size_t mesg_size = aligned_size + H5O_SIZEOF_MSGHDR(f); + /* Total size of newly allocated message */ - if (oh->nmesgs >= oh->alloc_nmesgs) { - int old_alloc=oh->alloc_nmesgs; - unsigned na = oh->alloc_nmesgs + H5O_NMESGS; - H5O_mesg_t *x = H5FL_SEQ_REALLOC (H5O_mesg_t, oh->mesg, (size_t)na); + HDassert( msg->raw_size - aligned_size >= H5O_SIZEOF_MSGHDR(f) ); - if (!x) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); - oh->alloc_nmesgs = na; - oh->mesg = x; + if (oh->nmesgs >= oh->alloc_nmesgs) { + + int old_alloc=oh->alloc_nmesgs; + unsigned na = oh->alloc_nmesgs + H5O_NMESGS; + H5O_mesg_t *x = H5FL_SEQ_REALLOC(H5O_mesg_t, oh->mesg, (size_t)na); - /* Set new object header info to zeros */ - HDmemset(&oh->mesg[old_alloc],0, - (oh->alloc_nmesgs-old_alloc)*sizeof(H5O_mesg_t)); + if (!x) { - /* "Retarget" local 'msg' pointer into newly allocated array of messages */ + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \ + "memory allocation failed"); + } + oh->alloc_nmesgs = na; + oh->mesg = x; + + /* Set new object header info to zeros */ + HDmemset(&oh->mesg[old_alloc],0, + (oh->alloc_nmesgs-old_alloc)*sizeof(H5O_mesg_t)); + + /* "Retarget" local 'msg' pointer into newly allocated array + * of messages + */ msg=&oh->mesg[idx]; - } - null_msg=&oh->mesg[oh->nmesgs++]; - null_msg->type = H5O_NULL; - null_msg->dirty = TRUE; - null_msg->native = NULL; - null_msg->raw = msg->raw + mesg_size; - null_msg->raw_size = msg->raw_size - mesg_size; - null_msg->chunkno = msg->chunkno; - msg->raw_size = aligned_size; + } + null_msg = &(oh->mesg[oh->nmesgs++]); + null_msg->type = H5O_NULL; + null_msg->dirty = TRUE; + null_msg->native = NULL; + null_msg->raw = msg->raw + mesg_size; + null_msg->raw_size = msg->raw_size - mesg_size; + null_msg->chunkno = msg->chunkno; + msg->raw_size = aligned_size; } /* initialize the new message */ @@ -3289,11 +3542,12 @@ H5O_alloc(H5F_t *f, H5O_t *oh, const H5O_class_t *type, size_t size, unsigned * *oh_flags_ptr |= H5AC__DIRTIED_FLAG; /* Set return value */ - ret_value=idx; + ret_value = idx; done: FUNC_LEAVE_NOAPI(ret_value); -} + +} /* H5O_alloc() */ #ifdef NOT_YET @@ -3956,7 +4210,7 @@ H5O_iterate_real(const H5G_entry_t *ent, const H5O_class_t *type, H5AC_protect_t if(oh_flags & H5AC__DIRTIED_FLAG) { /* Shouldn't be able to modify object header if we don't have write access */ HDassert(prot == H5AC_WRITE); - H5O_touch_oh(ent->file, oh, FALSE, &oh_flags); + H5O_touch_oh(ent->file, dxpl_id, oh, FALSE, &oh_flags); } /* end if */ done: diff --git a/src/H5Oprivate.h b/src/H5Oprivate.h index 8a9dd7d..e3043af 100644 --- a/src/H5Oprivate.h +++ b/src/H5Oprivate.h @@ -268,11 +268,11 @@ H5_DLL herr_t H5O_unprotect(H5G_entry_t *ent, struct H5O_t *oh, hid_t dxpl_id, H5_DLL int H5O_append(H5F_t *f, hid_t dxpl_id, struct H5O_t *oh, unsigned type_id, unsigned flags, const void *mesg, unsigned * oh_flags_ptr); H5_DLL herr_t H5O_touch(H5G_entry_t *ent, hbool_t force, hid_t dxpl_id); -H5_DLL herr_t H5O_touch_oh(H5F_t *f, struct H5O_t *oh, hbool_t force, - unsigned * oh_flags_ptr); +H5_DLL herr_t H5O_touch_oh(H5F_t *f, hid_t dxpl_id, struct H5O_t *oh, + hbool_t force, unsigned * oh_flags_ptr); #ifdef H5O_ENABLE_BOGUS H5_DLL herr_t H5O_bogus(H5G_entry_t *ent, hid_t dxpl_id); -H5_DLL herr_t H5O_bogus_oh(H5F_t *f, struct H5O_t *oh, +H5_DLL herr_t H5O_bogus_oh(H5F_t *f, hid_t dxpl_id, struct H5O_t *oh, unsigned * oh_flags_ptr); #endif /* H5O_ENABLE_BOGUS */ H5_DLL herr_t H5O_remove(H5G_entry_t *ent, unsigned type_id, int sequence, diff --git a/test/cache.c b/test/cache.c index 1ee2c22..84943ee 100644 --- a/test/cache.c +++ b/test/cache.c @@ -1767,7 +1767,10 @@ setup_cache(size_t max_cache_size, min_clean_size, (NUMBER_OF_ENTRY_TYPES - 1), (const char **)entry_type_names, - check_write_permitted); + check_write_permitted, + TRUE, + NULL, + NULL); if ( cache_ptr == NULL ) { @@ -2186,6 +2189,10 @@ protect_entry(H5C_t * cache_ptr, * Modified function to use the new dirtied parameter of * H5C_unprotect(). * + * JRM -- 9/8/05 + * Update for new entry size parameter in H5C_unprotect(). + * We don't use them here for now. + * *------------------------------------------------------------------------- */ @@ -2226,7 +2233,7 @@ unprotect_entry(H5C_t * cache_ptr, result = H5C_unprotect(NULL, -1, -1, cache_ptr, &(types[type]), entry_ptr->addr, (void *)entry_ptr, - flags); + flags, 0); if ( ( result < 0 ) || ( entry_ptr->header.is_protected ) || @@ -8530,6 +8537,10 @@ check_double_protect_err(void) * Modified function to use the new dirtied parameter in * H5C_unprotect(). * + * JRM -- 9/8/05 + * Updated function for the new size change parameter in + * H5C_unprotect(). We don't use them for now. + * *------------------------------------------------------------------------- */ @@ -8567,8 +8578,8 @@ check_double_unprotect_err(void) if ( pass ) { result = H5C_unprotect(NULL, -1, -1, cache_ptr, &(types[0]), - entry_ptr->addr, (void *)entry_ptr, - H5C__NO_FLAGS_SET); + entry_ptr->addr, (void *)entry_ptr, + H5C__NO_FLAGS_SET, 0); if ( result > 0 ) { diff --git a/testpar/t_mdset.c b/testpar/t_mdset.c index 9b268b6..225aa8a 100644 --- a/testpar/t_mdset.c +++ b/testpar/t_mdset.c @@ -439,7 +439,7 @@ void big_dataset(void) /* Check that file of the correct size was created */ file_size=h5_mpi_get_file_size(filename, MPI_COMM_WORLD, MPI_INFO_NULL); #ifndef WIN32 - VRFY((file_size == 2147485696ULL), "File is correct size"); + VRFY((file_size == 2147485696ULL), "File is correct size (~2GB)"); #endif /* @@ -470,7 +470,7 @@ void big_dataset(void) /* Check that file of the correct size was created */ file_size=h5_mpi_get_file_size(filename, MPI_COMM_WORLD, MPI_INFO_NULL); #ifndef WIN32 - VRFY((file_size == 4294969344ULL), "File is correct size"); + VRFY((file_size == 4294969344ULL), "File is correct size (~4GB)"); #endif /* @@ -501,7 +501,7 @@ void big_dataset(void) /* Check that file of the correct size was created */ file_size=h5_mpi_get_file_size(filename, MPI_COMM_WORLD, MPI_INFO_NULL); #ifndef WIN32 - VRFY((file_size == 8589936640ULL), "File is correct size"); + VRFY((file_size == 8589936640ULL), "File is correct size (~8GB)"); #endif /* Close fapl */ -- cgit v0.12