diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/H5AC.c | 2106 | ||||
-rw-r--r-- | src/H5ACprivate.h | 4 | ||||
-rw-r--r-- | src/H5C.c | 1053 | ||||
-rw-r--r-- | src/H5Cpkg.h | 39 | ||||
-rw-r--r-- | src/H5Cprivate.h | 71 | ||||
-rw-r--r-- | src/H5D.c | 2 | ||||
-rw-r--r-- | src/H5F.c | 62 | ||||
-rw-r--r-- | src/H5FD.c | 2 | ||||
-rw-r--r-- | src/H5FDmpio.c | 38 | ||||
-rw-r--r-- | src/H5FDmpiposix.c | 34 | ||||
-rw-r--r-- | src/H5FDmulti.c | 9 | ||||
-rw-r--r-- | src/H5Fprivate.h | 1 | ||||
-rw-r--r-- | src/H5HL.c | 189 | ||||
-rw-r--r-- | src/H5HLpkg.h | 1 | ||||
-rw-r--r-- | src/H5MF.c | 8 | ||||
-rw-r--r-- | src/H5MFprivate.h | 2 | ||||
-rw-r--r-- | src/H5O.c | 746 | ||||
-rw-r--r-- | src/H5Oprivate.h | 6 |
18 files changed, 3972 insertions, 401 deletions
@@ -42,18 +42,24 @@ *------------------------------------------------------------------------- */ +#define H5C_PACKAGE /*suppress error about including H5Cpkg */ #define H5F_PACKAGE /*suppress error about including H5Fpkg */ /* Interface initialization */ #define H5_INTERFACE_INIT_FUNC H5AC_init_interface +#ifdef H5_HAVE_PARALLEL +#include <mpi.h> +#endif /* H5_HAVE_PARALLEL */ #include "H5private.h" /* Generic Functions */ #include "H5ACprivate.h" /* Metadata cache */ +#include "H5Cpkg.h" /* Cache */ #include "H5Dprivate.h" /* Dataset functions */ #include "H5Eprivate.h" /* Error handling */ #include "H5Fpkg.h" /* Files */ #include "H5FDprivate.h" /* File drivers */ +#include "H5FLprivate.h" /* Free Lists */ #include "H5Iprivate.h" /* IDs */ #include "H5Pprivate.h" /* Property lists */ @@ -62,6 +68,299 @@ #include "H5FPprivate.h" /* Flexible PHDF5 */ #endif /* H5_HAVE_FPHDF5 */ +#define H5AC_DEBUG_DIRTY_BYTES_CREATION 0 + +/**************************************************************************** + * + * structure H5AC_aux_t + * + * While H5AC has become a wrapper for the cache implemented in H5C.c, there + * are some features of the metadata cache that are specific to it, and which + * therefore do not belong in the more generic H5C cache code. + * + * In particular, there is the matter of synchronizing writes from the + * metadata cache to disk in the PHDF5 case. + * + * Prior to this update, the presumption was that all metadata caches would + * write the same data at the same time since all operations modifying + * metadata must be performed collectively. Given this assumption, it was + * safe to allow only the writes from process 0 to actually make it to disk, + * while metadata writes from all other processes were discarded. + * + * Unfortunately, this presumption is in error as operations that read + * metadata need not be collective, but can change the location of dirty + * entries in the metadata cache LRU lists. This can result in the same + * metadata write operation triggering writes from the metadata caches on + * some processes, but not all (causing a hang), or in different sets of + * entries being written from different caches (potentially resulting in + * metadata corruption in the file). + * + * To deal with this issue, I decided to apply a paradigm shift to the way + * metadata is written to disk. + * + * With this set of changes, only the metadata cache on process 0 is able + * to write metadata to disk, although metadata caches on all other + * processes can read metadata from disk as before. + * + * To keep all the other caches from getting plugged up with dirty metadata, + * process 0 periodically broadcasts a list of entries that it has flushed + * since that last notice, and which are currently clean. The other caches + * mark these entries as clean as well, which allows them to evict the + * entries as needed. + * + * One obvious problem in this approach is synchronizing the broadcasts + * and receptions, as different caches may see different amounts of + * activity. + * + * The current solution is for the caches to track the number of bytes + * of newly generated dirty metadata, and to broadcast and receive + * whenever this value exceeds some user specified threshold. + * + * Maintaining this count is easy for all processes not on process 0 -- + * all that is necessary is to add the size of the entry to the total + * whenever there is an insertion, a rename of a previously clean entry, + * or whever a previously clean entry is marked dirty in an unprotect. + * + * On process 0, we have to be careful not to count dirty bytes twice. + * If an entry is marked dirty, flushed, and marked dirty again, all + * within a single reporting period, it only th first marking should + * be added to the dirty bytes generated tally, as that is all that + * the other processes will see. + * + * At present, this structure exists to maintain the fields needed to + * implement the above scheme, and thus is only used in the parallel + * case. However, other uses may arise in the future. + * + * Instance of this structure are associated with metadata caches via + * the aux_ptr field of H5C_t (see H5Cpkg.h). The H5AC code is + * responsible for allocating, maintaining, and discarding instances + * of H5AC_aux_t. + * + * The remainder of this header comments documents the individual fields + * of the structure. + * + * JRM - 6/27/05 + * + * magic: Unsigned 32 bit integer always set to + * H5AC__H5AC_AUX_T_MAGIC. This field is used to validate + * pointers to instances of H5AC_aux_t. + * + * mpi_comm: MPI communicator associated with the file for which the + * cache has been created. + * + * mpi_rank: MPI rank of this process within mpi_comm. + * + * mpi_size: Number of processes in mpi_comm. + * + * write_permitted: Boolean flag used to control whether the cache + * is permitted to write to file. + * + * dirty_bytes_threshold: Integer field containing the dirty bytes + * generation threashold. Whenever dirty byte creation + * exceeds this value, the metadata cache on process 0 + * broadcasts a list of the entries it has flushed since + * the last broadcast (or since the beginning of execution) + * and which are currently clean (if they are still in the + * cache) + * + * Similarly, metadata caches on processes other than process + * 0 will attempt to receive a list of clean entries whenever + * the threshold is exceeded. + * + * dirty_bytes: Integer field containing the number of bytes of dirty + * metadata generated since the beginning of the computation, + * or (more typically) since the last clean entries list + * broadcast. This field is reset to zero after each such + * broadcast. + * + * dirty_bytes_propagations: This field only exists when the + * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. + * + * It is used to track the number of times the cleaned list + * has been propagated from process 0 to the other + * processes. + * + * unprotect_dirty_bytes: This field only exists when the + * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. + * + * It is used to track the number of dirty bytes created + * via unprotect operations since the last time the cleaned + * list was propagated. + * + * unprotect_dirty_bytes_updates: This field only exists when the + * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. + * + * It is used to track the number of times dirty bytes have + * been created via unprotect operations since the last time + * the cleaned list was propagated. + * + * insert_dirty_bytes: This field only exists when the + * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. + * + * It is used to track the number of dirty bytes created + * via insert operations since the last time the cleaned + * list was propagated. + * + * insert_dirty_bytes_updates: This field only exists when the + * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. + * + * It is used to track the number of times dirty bytes have + * been created via insert operations since the last time + * the cleaned list was propagated. + * + * rename_dirty_bytes: This field only exists when the + * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. + * + * It is used to track the number of dirty bytes created + * via rename operations since the last time the cleaned + * list was propagated. + * + * rename_dirty_bytes_updates: This field only exists when the + * H5AC_DEBUG_DIRTY_BYTES_CREATION #define is TRUE. + * + * It is used to track the number of times dirty bytes have + * been created via rename operations since the last time + * the cleaned list was propagated. + * + * d_slist_ptr: Pointer to an instance of H5SL_t used to maintain a list + * of entries that have been dirtied since the last time they + * were listed in a clean entries broadcast. This list is + * only maintained by the metadata cache on process 0 -- it + * it used to maintain a view of the dirty entries as seen + * by the other caches, so as to keep the dirty bytes count + * in synchronization with them. + * + * Thus on process 0, the dirty_bytes count is incremented + * only if either + * + * 1) an entry is inserted in the metadata cache, or + * + * 2) a previously clean entry is renamed, and it does not + * already appear in the dirty entry list, or + * + * 3) a previously clean entry is unprotected with the + * dirtied flag set and the entry does not already appear + * in the dirty entry list. + * + * Entries are added to the dirty entry list whever they cause + * the dirty bytes count to be increased. They are removed + * when they appear in a clean entries broadcast. Note that + * renames must be reflected in the dirty entry list. + * + * To reitterate, this field is only used on process 0 -- it + * should be NULL on all other processes. + * + * d_slist_len: Integer field containing the number of entries in the + * dirty entry list. This field should always contain the + * value 0 on all processes other than process 0. It exists + * primarily for sanity checking. + * + * c_slist_ptr: Pointer to an instance of H5SL_t used to maintain a list + * of entries that were dirty, have been flushed + * to disk since the last clean entries broadcast, and are + * still clean. Since only process 0 can write to disk, this + * list only exists on process 0. + * + * In essence, this slist is used to assemble the contents of + * the next clean entries broadcast. The list emptied after + * each broadcast. + * + * c_slist_len: Integer field containing the number of entries in the clean + * entries list (*c_slist_ptr). This field should always + * contain the value 0 on all processes other than process 0. + * It exists primarily for sanity checking. + * + ****************************************************************************/ + +#ifdef H5_HAVE_PARALLEL + +#define H5AC__H5AC_AUX_T_MAGIC (unsigned)0x00D0A01 + +typedef struct H5AC_aux_t +{ + uint32_t magic; + + MPI_Comm mpi_comm; + + int mpi_rank; + + int mpi_size; + + hbool_t write_permitted; + + int32_t dirty_bytes_threshold; + + int32_t dirty_bytes; + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + + int32_t dirty_bytes_propagations; + + int32_t unprotect_dirty_bytes; + int32_t unprotect_dirty_bytes_updates; + + int32_t insert_dirty_bytes; + int32_t insert_dirty_bytes_updates; + + int32_t rename_dirty_bytes; + int32_t rename_dirty_bytes_updates; + +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + + H5SL_t * d_slist_ptr; + + int32_t d_slist_len; + + H5SL_t * c_slist_ptr; + + int32_t c_slist_len; + +} H5AC_aux_t; /* struct H5AC_aux_t */ + +/* Declare a free list to manage the H5AC_aux_t struct */ +H5FL_DEFINE_STATIC(H5AC_aux_t); + +#endif /* H5_HAVE_PARALLEL */ + +/**************************************************************************** + * + * structure H5AC_slist_entry_t + * + * The dirty entry list maintained via the d_slist_ptr field of H5AC_aux_t + * and the cleaned entry list maintained via the c_slist_ptr field of + * H5AC_aux_t are just lists of the file offsets of the dirty/cleaned + * entries. Unfortunately, the slist code makes us define a dynamically + * allocated structure to store these offsets in. This structure serves + * that purpose. Its fields are as follows: + * + * magic: Unsigned 32 bit integer always set to + * H5AC__H5AC_SLIST_ENTRY_T_MAGIC. This field is used to + * validate pointers to instances of H5AC_slist_entry_t. + * + * addr: file offset of a metadata entry. Entries are added to this + * list (if they aren't there already) when they are marked + * dirty in an unprotect, inserted, or renamed. They are + * removed when they appear in a clean entries broadcast. + * + ****************************************************************************/ + +#ifdef H5_HAVE_PARALLEL + +#define H5AC__H5AC_SLIST_ENTRY_T_MAGIC 0x00D0A02 + +typedef struct H5AC_slist_entry_t +{ + uint32_t magic; + + haddr_t addr; +} H5AC_slist_entry_t; + +/* Declare a free list to manage the H5AC_slist_entry_t struct */ +H5FL_DEFINE_STATIC(H5AC_slist_entry_t); + +#endif /* H5_HAVE_PARALLEL */ + + /* * Private file-scope variables. */ @@ -90,6 +389,55 @@ static herr_t H5AC_check_if_write_permitted(const H5F_t *f, hid_t dxpl_id, hbool_t * write_permitted_ptr); +#ifdef H5_HAVE_PARALLEL +static herr_t H5AC_broadcast_clean_list(H5AC_t * cache_ptr); + +static herr_t H5AC_log_deleted_entry(H5AC_t * cache_ptr, + H5AC_info_t * entry_ptr, + haddr_t addr, + unsigned int flags); + +static herr_t H5AC_log_dirtied_entry(H5AC_t * cache_ptr, + H5C_cache_entry_t * entry_ptr, + haddr_t addr, + hbool_t size_changed, + size_t new_size); + +static herr_t H5AC_log_flushed_entry(H5C_t * cache_ptr, + haddr_t addr, + hbool_t was_dirty, + unsigned flags, + int type_id); + +#if 0 /* this is useful debugging code -- JRM */ +static herr_t H5AC_log_flushed_entry_dummy(H5C_t * cache_ptr, + haddr_t addr, + hbool_t was_dirty, + unsigned flags, + int type_id); +#endif /* JRM */ + +static herr_t H5AC_log_inserted_entry(H5F_t * f, + H5AC_t * cache_ptr, + H5AC_info_t * entry_ptr, + const H5AC_class_t * type, + haddr_t addr); + +static herr_t H5AC_propagate_flushed_and_still_clean_entries_list(H5F_t * f, + hid_t dxpl_id, + H5AC_t * cache_ptr, + hbool_t do_barrier); + +static herr_t H5AC_receive_and_apply_clean_list(H5F_t * f, + hid_t primary_dxpl_id, + hid_t secondary_dxpl_id, + H5AC_t * cache_ptr); + +static herr_t H5AC_log_renamed_entry(H5AC_t * cache_ptr, + haddr_t old_addr, + haddr_t new_addr); +#endif /* H5_HAVE_PARALLEL */ + /*------------------------------------------------------------------------- * Function: H5AC_init @@ -351,6 +699,13 @@ H5AC_term_interface(void) * through the function. * JRM - 4/7/05 * + * Added code allocating and initializing the auxilary + * structure (an instance of H5AC_aux_t), and linking it + * to the instance of H5C_t created by H5C_create(). At + * present, the auxilary structure is only used in PHDF5. + * + * JRM - 6/28/05 + * *------------------------------------------------------------------------- */ @@ -376,6 +731,12 @@ H5AC_create(const H5F_t *f, { herr_t ret_value = SUCCEED; /* Return value */ herr_t result; +#ifdef H5_HAVE_PARALLEL + MPI_Comm mpi_comm = MPI_COMM_NULL; + int mpi_rank = -1; + int mpi_size = -1; + H5AC_aux_t * aux_ptr = NULL; +#endif /* H5_HAVE_PARALLEL */ FUNC_ENTER_NOAPI(H5AC_create, FAIL) @@ -390,15 +751,138 @@ H5AC_create(const H5F_t *f, HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Bad cache configuration"); } - /* The default max cache size and min clean size will frequently be - * overwritten shortly by the subsequent set resize config call. - * -- JRM - */ - f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE, - H5AC__DEFAULT_MIN_CLEAN_SIZE, - (H5AC_NTYPES - 1), - (const char **)H5AC_entry_type_names, - H5AC_check_if_write_permitted); +#ifdef H5_HAVE_PARALLEL + if ( IS_H5FD_MPI(f) ) { + + if ( (mpi_comm = H5F_mpi_get_comm(f)) == MPI_COMM_NULL ) { + + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, \ + "can't get MPI communicator") + } + + if ( (mpi_rank = H5F_mpi_get_rank(f)) < 0 ) { + + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get mpi rank") + } + + if ( (mpi_size = H5F_mpi_get_size(f)) < 0 ) { + + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get mpi size") + } + + /* There is no point in setting up the auxilary structure if size + * is less than or equal to 1, as there will never be any processes + * to broadcast the clean lists to. + */ + if ( mpi_size > 1 ) { + + if ( NULL == (aux_ptr = H5FL_CALLOC(H5AC_aux_t)) ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "Can't allocate H5AC auxilary structure.") + + } else { + + aux_ptr->magic = H5AC__H5AC_AUX_T_MAGIC; + aux_ptr->mpi_comm = mpi_comm; + aux_ptr->mpi_rank = mpi_rank; + aux_ptr->mpi_size = mpi_size; + aux_ptr->write_permitted = FALSE; + aux_ptr->dirty_bytes_threshold = 256 * 1024; + aux_ptr->dirty_bytes = 0; +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->dirty_bytes_propagations = 0; + aux_ptr->unprotect_dirty_bytes = 0; + aux_ptr->unprotect_dirty_bytes_updates = 0; + aux_ptr->insert_dirty_bytes = 0; + aux_ptr->insert_dirty_bytes_updates = 0; + aux_ptr->rename_dirty_bytes = 0; + aux_ptr->rename_dirty_bytes_updates = 0; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + aux_ptr->d_slist_ptr = NULL; + aux_ptr->d_slist_len = 0; + aux_ptr->c_slist_ptr = NULL; + aux_ptr->c_slist_len = 0; + } + + if ( mpi_rank == 0 ) { + + aux_ptr->d_slist_ptr = + H5SL_create(H5SL_TYPE_HADDR,0.5,(size_t)16); + + if ( aux_ptr->d_slist_ptr == NULL ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, + "can't create dirtied entry list.") + } + + aux_ptr->c_slist_ptr = + H5SL_create(H5SL_TYPE_HADDR,0.5,(size_t)16); + + if ( aux_ptr->c_slist_ptr == NULL ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, + "can't create cleaned entry list.") + } + } + } + + if ( aux_ptr != NULL ) { + + if ( aux_ptr->mpi_rank == 0 ) { + + f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE, + H5AC__DEFAULT_MIN_CLEAN_SIZE, + (H5AC_NTYPES - 1), + (const char **)H5AC_entry_type_names, + H5AC_check_if_write_permitted, + TRUE, + H5AC_log_flushed_entry, + (void *)aux_ptr); + + } else { + + f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE, + H5AC__DEFAULT_MIN_CLEAN_SIZE, + (H5AC_NTYPES - 1), + (const char **)H5AC_entry_type_names, + NULL, + FALSE, +#if 0 /* this is useful debugging code -- keep it for a while */ /* JRM */ + H5AC_log_flushed_entry_dummy, +#else /* JRM */ + NULL, +#endif /* JRM */ + (void *)aux_ptr); + } + } else { + + f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE, + H5AC__DEFAULT_MIN_CLEAN_SIZE, + (H5AC_NTYPES - 1), + (const char **)H5AC_entry_type_names, + H5AC_check_if_write_permitted, + TRUE, + NULL, + NULL); + } + } else { +#endif /* H5_HAVE_PARALLEL */ + /* The default max cache size and min clean size will frequently be + * overwritten shortly by the subsequent set resize config call. + * -- JRM + */ + f->shared->cache = H5C_create(H5AC__DEFAULT_MAX_CACHE_SIZE, + H5AC__DEFAULT_MIN_CLEAN_SIZE, + (H5AC_NTYPES - 1), + (const char **)H5AC_entry_type_names, + H5AC_check_if_write_permitted, + TRUE, + NULL, + NULL); +#ifdef H5_HAVE_PARALLEL + } +#endif /* H5_HAVE_PARALLEL */ if ( NULL == f->shared->cache ) { @@ -416,6 +900,31 @@ H5AC_create(const H5F_t *f, done: +#ifdef H5_HAVE_PARALLEL + + /* if there is a failure, try to tidy up the auxilary structure */ + + if ( ret_value != SUCCEED ) { + + if ( aux_ptr != NULL ) { + + if ( aux_ptr->d_slist_ptr != NULL ) { + + H5SL_close(aux_ptr->d_slist_ptr); + } + + if ( aux_ptr->c_slist_ptr != NULL ) { + + H5SL_close(aux_ptr->c_slist_ptr); + } + + aux_ptr->magic = 0; + H5FL_FREE(H5AC_aux_t, aux_ptr); + aux_ptr = NULL; + } + } +#endif /* H5_HAVE_PARALLEL */ + FUNC_LEAVE_NOAPI(ret_value) } /* H5AC_create() */ @@ -445,6 +954,10 @@ done: * * JRM - 6/7/04 * + * Added code to free the auxiliary structure and its + * associated slist if present. + * JRM - 6/28/05 + * *------------------------------------------------------------------------- */ herr_t @@ -452,12 +965,23 @@ H5AC_dest(H5F_t *f, hid_t dxpl_id) { H5AC_t *cache = NULL; herr_t ret_value=SUCCEED; /* Return value */ +#ifdef H5_HAVE_PARALLEL + H5AC_aux_t * aux_ptr = NULL; +#endif /* H5_HAVE_PARALLEL */ FUNC_ENTER_NOAPI(H5AC_dest, FAIL) assert(f); assert(f->shared->cache); cache = f->shared->cache; +#ifdef H5_HAVE_PARALLEL + aux_ptr = cache->aux_ptr; + + if ( aux_ptr != NULL ) { + + HDassert ( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + } +#endif /* H5_HAVE_PARALLEL */ f->shared->cache = NULL; @@ -466,6 +990,25 @@ H5AC_dest(H5F_t *f, hid_t dxpl_id) HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "can't destroy cache") } +#ifdef H5_HAVE_PARALLEL + if ( aux_ptr != NULL ) { + + if ( aux_ptr->d_slist_ptr != NULL ) { + + H5SL_close(aux_ptr->d_slist_ptr); + } + + if ( aux_ptr->c_slist_ptr != NULL ) { + + H5SL_close(aux_ptr->c_slist_ptr); + } + + aux_ptr->magic = 0; + H5FL_FREE(H5AC_aux_t, aux_ptr); + aux_ptr = NULL; + } +#endif /* H5_HAVE_PARALLEL */ + done: FUNC_LEAVE_NOAPI(ret_value) @@ -529,28 +1072,103 @@ done: * * Complete re-write. See above for details. -- JRM 5/11/04 * - * Abstracted the guts of the function to H5C_dest() in H5C.c, - * and then re-wrote the function as a wrapper for H5C_dest(). + * Abstracted the guts of the function to H5C_flush_cache() + * in H5C.c, and then re-wrote the function as a wrapper for + * H5C_flush_cache(). * * JRM - 6/7/04 * + * JRM - 7/5/05 + * Modified function as part of a fix for a cache coherency + * bug in PHDF5. See the header comments on the H5AC_aux_t + * structure for details. + * *------------------------------------------------------------------------- */ herr_t H5AC_flush(H5F_t *f, hid_t dxpl_id, unsigned flags) { - herr_t status; - herr_t ret_value=SUCCEED; /* Return value */ + herr_t status; + herr_t ret_value = SUCCEED; /* Return value */ +#ifdef H5_HAVE_PARALLEL + H5AC_aux_t * aux_ptr = NULL; + int mpi_code; +#endif /* H5_HAVE_PARALLEL */ + FUNC_ENTER_NOAPI(H5AC_flush, FAIL) HDassert(f); HDassert(f->shared->cache); - status = H5C_flush_cache(f, - dxpl_id, - H5AC_noblock_dxpl_id, - f->shared->cache, +#ifdef H5_HAVE_PARALLEL + aux_ptr = f->shared->cache->aux_ptr; + + if ( aux_ptr != NULL ) { + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + HDfprintf(stdout, + "%d::H5AC_flush: (u/uu/i/iu/r/ru) = %d/%d/%d/%d/%d/%d\n", + (int)(aux_ptr->mpi_rank), + (int)(aux_ptr->unprotect_dirty_bytes), + (int)(aux_ptr->unprotect_dirty_bytes_updates), + (int)(aux_ptr->insert_dirty_bytes), + (int)(aux_ptr->insert_dirty_bytes_updates), + (int)(aux_ptr->rename_dirty_bytes), + (int)(aux_ptr->rename_dirty_bytes_updates)); +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + + /* to prevent "messages from the future" we must synchronize all + * processes before we start the flush. Hence the following + * barrier. + */ + if ( MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)) ) { + + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) + } + + /* if the clear only flag is set, this flush will not involve any + * disk I/O. In such cases, it is not necessary to let process 0 + * flush first. + */ + if ( ( aux_ptr->mpi_rank == 0 ) && + ( (flags & H5AC__FLUSH_CLEAR_ONLY_FLAG) != 0 ) ) { + + unsigned init_flush_flags = H5AC__NO_FLAGS_SET; + + if ( ( (flags & H5AC__FLUSH_MARKED_ENTRIES_FLAG) != 0 ) && + ( (flags & H5AC__FLUSH_INVALIDATE_FLAG) == 0 ) ) { + + init_flush_flags |= H5AC__FLUSH_MARKED_ENTRIES_FLAG; + } + + aux_ptr->write_permitted = TRUE; + + status = H5C_flush_cache(f, + H5AC_noblock_dxpl_id, + H5AC_noblock_dxpl_id, + f->shared->cache, + init_flush_flags); + + aux_ptr->write_permitted = FALSE; + + if ( status < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.") + } + } /* end if ( aux_ptr->mpi_rank == 0 ) */ + + status = H5AC_propagate_flushed_and_still_clean_entries_list(f, + H5AC_noblock_dxpl_id, + f->shared->cache, + FALSE); + } /* end if ( aux_ptr != NULL ) */ +#endif /* H5_HAVE_PARALLEL */ + + status = H5C_flush_cache(f, + dxpl_id, + H5AC_noblock_dxpl_id, + f->shared->cache, flags); if ( status < 0 ) { @@ -616,8 +1234,16 @@ done: * moving management of the dirty flag on cache entries into * the cache code. * + * JRM - 7/5/05 + * Added code to track dirty byte generation, and to trigger + * clean entry list propagation when it exceeds a user + * specified threshold. Note that this code only applies in + * the PHDF5 case. It should have no effect on either the + * serial or FPHSD5 cases. + * *------------------------------------------------------------------------- */ + herr_t H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void *thing, unsigned int flags) { @@ -625,6 +1251,9 @@ H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void * H5AC_info_t *info; H5AC_t *cache; herr_t ret_value=SUCCEED; /* Return value */ +#ifdef H5_HAVE_PARALLEL + H5AC_aux_t * aux_ptr = NULL; +#endif /* H5_HAVE_PARALLEL */ FUNC_ENTER_NOAPI(H5AC_set, FAIL) @@ -717,6 +1346,23 @@ H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void * #endif /* H5_HAVE_FPHDF5 */ #endif /* H5_HAVE_PARALLEL */ +#ifdef H5_HAVE_PARALLEL + if ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) { + + result = H5AC_log_inserted_entry(f, + f->shared->cache, + (H5AC_info_t *)thing, + type, + addr); + + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, \ + "H5AC_log_inserted_entry() failed.") + } + } +#endif /* H5_HAVE_PARALLEL */ + result = H5C_insert_entry(f, dxpl_id, H5AC_noblock_dxpl_id, @@ -731,6 +1377,22 @@ H5AC_set(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, void * HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, "H5C_insert_entry() failed") } +#ifdef H5_HAVE_PARALLEL + if ( ( aux_ptr != NULL ) && + ( aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold ) ) { + + result = H5AC_propagate_flushed_and_still_clean_entries_list(f, + H5AC_noblock_dxpl_id, + f->shared->cache, + TRUE); + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "Can't propagate clean entries list.") + } + } +#endif /* H5_HAVE_PARALLEL */ + done: FUNC_LEAVE_NOAPI(ret_value) @@ -766,6 +1428,18 @@ done: * in H5C.c, and then re-wrote the function as a wrapper for * H5C_rename_entry(). * + * JRM - 7/5/05 + * Added code to track dirty byte generation, and to trigger + * clean entry list propagation when it exceeds a user + * specified threshold. Note that this code only applies in + * the PHDF5 case. It should have no effect on either the + * serial or FPHSD5 cases. + * + * Note that this code presumes that the renamed entry will + * be present in all caches -- which it must be at present. + * To maintain this invarient, only rename entries immediately + * after you unprotect them. + * *------------------------------------------------------------------------- */ herr_t @@ -773,6 +1447,9 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad { herr_t result; herr_t ret_value=SUCCEED; /* Return value */ +#ifdef H5_HAVE_PARALLEL + H5AC_aux_t * aux_ptr = NULL; +#endif /* H5_HAVE_PARALLEL */ FUNC_ENTER_NOAPI(H5AC_rename, FAIL) @@ -808,6 +1485,13 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad * In any case, don't check this code in without revisiting this * issue. * JRM -- 6/6/05 + * + * On reflection, the code was already broken, as there was no + * way to advise the SAP that a renamed entry had changed its + * address, or was dirty. I will not worry about it for now, + * but the matter must be addressed if we ever get serious + * about FPHDF5. + * JRM -- 7/5/05 */ HGOTO_DONE(SUCCEED); @@ -816,6 +1500,21 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad #endif /* H5_HAVE_FPHDF5 */ #endif /* H5_HAVE_PARALLEL */ +#ifdef H5_HAVE_PARALLEL + if ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) { + + result = H5AC_log_renamed_entry(f->shared->cache, + old_addr, + new_addr); + + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "H5AC_log_renamed_entry() failed.") + } + } +#endif /* H5_HAVE_PARALLEL */ + result = H5C_rename_entry(f->shared->cache, type, old_addr, @@ -827,6 +1526,22 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_ad "H5C_rename_entry() failed.") } +#ifdef H5_HAVE_PARALLEL + if ( ( aux_ptr != NULL ) && + ( aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold ) ) { + + result = H5AC_propagate_flushed_and_still_clean_entries_list(f, + H5AC_noblock_dxpl_id, + f->shared->cache, + TRUE); + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "Can't propagate clean entries list.") + } + } +#endif /* H5_HAVE_PARALLEL */ + done: FUNC_LEAVE_NOAPI(ret_value) @@ -837,7 +1552,7 @@ done: /*------------------------------------------------------------------------- * Function: H5AC_protect * - * Purpose: If the target entry is not in the cache, load it. If + * Purpose: If the target entry is not in the cache, load it. If * necessary, attempt to evict one or more entries to keep * the cache within its maximum size. * @@ -845,8 +1560,8 @@ done: * to the caller. The caller must call H5AC_unprotect() when * finished with the entry. * - * While it is protected, the entry may not be either evicted - * or flushed -- nor may it be accessed by another call to + * While it is protected, the entry may not be either evicted + * or flushed -- nor may it be accessed by another call to * H5AC_protect. Any attempt to do so will result in a failure. * * This comment is a re-write of the original Purpose: section. @@ -887,8 +1602,8 @@ done: * Purpose section above. * * JRM - 6/7/04 - * Abstracted the guts of the function to H5C_protect() - * in H5C.c, and then re-wrote the function as a wrapper for + * Abstracted the guts of the function to H5C_protect() + * in H5C.c, and then re-wrote the function as a wrapper for * H5C_protect(). * *------------------------------------------------------------------------- @@ -1108,6 +1823,18 @@ done: * part of a collection of changes directed at moving * management of cache entry dirty flags into the H5C code. * + * JRM - 7/5/05 + * Added code to track dirty byte generation, and to trigger + * clean entry list propagation when it exceeds a user + * specified threshold. Note that this code only applies in + * the PHDF5 case. It should have no effect on either the + * serial or FPHSD5 cases. + * + * JRM - 9/8/05 + * Added code to track entry size changes. This is necessary + * as it can effect dirty byte creation counts, thereby + * throwing the caches out of sync in the PHDF5 case. + * *------------------------------------------------------------------------- */ herr_t @@ -1116,6 +1843,12 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, { herr_t result; herr_t ret_value=SUCCEED; /* Return value */ + hbool_t size_changed = FALSE; + hbool_t dirtied; + size_t new_size = 0; +#ifdef H5_HAVE_PARALLEL + H5AC_aux_t * aux_ptr = NULL; +#endif /* H5_HAVE_PARALLEL */ FUNC_ENTER_NOAPI(H5AC_unprotect, FAIL) @@ -1129,6 +1862,23 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, HDassert( ((H5AC_info_t *)thing)->addr == addr ); HDassert( ((H5AC_info_t *)thing)->type == type ); + dirtied = ((flags & H5AC__DIRTIED_FLAG) == H5AC__DIRTIED_FLAG ); + + if ( dirtied ) { + + if ( (type->size)(f, thing, &new_size) < 0 ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTGETSIZE, FAIL, \ + "Can't get size of thing") + } + + if ( ((H5AC_info_t *)thing)->size != new_size ) { + + size_changed = TRUE; + flags = flags | H5AC__SIZE_CHANGED_FLAG; + } + } + #ifdef H5_HAVE_PARALLEL #ifdef H5_HAVE_FPHDF5 /* The following code to support flexible parallel is a direct copy @@ -1202,6 +1952,40 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, #endif /* H5_HAVE_FPHDF5 */ #endif /* H5_HAVE_PARALLEL */ +#ifdef H5_HAVE_PARALLEL + if ( ( dirtied ) && ( ((H5AC_info_t *)thing)->is_dirty == FALSE ) && + ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) ) { + + result = H5AC_log_dirtied_entry(f->shared->cache, + (H5AC_info_t *)thing, + addr, + size_changed, + new_size); + + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "H5AC_log_dirtied_entry() failed.") + } + } + + if ( ( (flags & H5C__DELETED_FLAG) != 0 ) && + ( NULL != (aux_ptr = f->shared->cache->aux_ptr) ) && + ( aux_ptr->mpi_rank == 0 ) ) { + + result = H5AC_log_deleted_entry(f->shared->cache, + (H5AC_info_t *)thing, + addr, + flags); + + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "H5AC_log_deleted_entry() failed.") + } + } +#endif /* H5_HAVE_PARALLEL */ + result = H5C_unprotect(f, dxpl_id, H5AC_noblock_dxpl_id, @@ -1209,7 +1993,8 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, type, addr, thing, - flags); + flags, + new_size); if ( result < 0 ) { @@ -1217,6 +2002,23 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr, "H5C_unprotect() failed.") } +#ifdef H5_HAVE_PARALLEL + if ( ( aux_ptr != NULL ) && + ( aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold ) ) { + + result = H5AC_propagate_flushed_and_still_clean_entries_list(f, + H5AC_noblock_dxpl_id, + f->shared->cache, + TRUE); + + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "Can't propagate clean entries list.") + } + } +#endif /* H5_HAVE_PARALLEL */ + done: FUNC_LEAVE_NOAPI(ret_value) @@ -1686,6 +2488,184 @@ done: /*------------------------------------------------------------------------- * + * Function: H5AC_broadcast_clean_list() + * + * Purpose: Broadcast the contents of the process 0 cleaned entry + * slist. In passing, also remove all entries from said + * list, and also remove any matching entries from the dirtied + * slist. + * + * This function must only be called by the process with + * MPI_rank 0. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 7/1/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_broadcast_clean_list(H5AC_t * cache_ptr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + haddr_t addr; + H5AC_aux_t * aux_ptr = NULL; + H5SL_node_t * slist_node_ptr = NULL; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + MPI_Offset * buf_ptr = NULL; + size_t buf_size; + int i = 0; + int mpi_result; + int num_entries; + + FUNC_ENTER_NOAPI(H5AC_broadcast_clean_list, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = cache_ptr->aux_ptr; + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->mpi_rank == 0 ); + HDassert( aux_ptr->c_slist_ptr != NULL ); + HDassert( H5SL_count(aux_ptr->c_slist_ptr) == + (size_t)(aux_ptr->c_slist_len) ); + + + /* First broadcast the number of entries in the list so that the + * receives can set up a buffer to receive them. If there aren't + * any, we are done. + */ + num_entries = aux_ptr->c_slist_len; + + mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm); + + if ( mpi_result != MPI_SUCCESS ) { + + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 1", mpi_result) + + } + + if ( num_entries > 0 ) + { + /* allocate a buffer to store the list of entry base addresses in */ + + buf_size = sizeof(MPI_Offset) * (size_t)num_entries; + + buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size); + + if ( buf_ptr == NULL ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "memory allocation failed for clean entry buffer") + } + + /* now load the entry base addresses into the buffer, emptying the + * cleaned entry list in passing + */ + + while ( NULL != (slist_node_ptr = H5SL_first(aux_ptr->c_slist_ptr) ) ) + { + slist_entry_ptr = H5SL_item(slist_node_ptr); + + HDassert(slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC); + + HDassert( i < num_entries ); + + addr = slist_entry_ptr->addr; + + if ( H5FD_mpi_haddr_to_MPIOff(addr, &(buf_ptr[i])) < 0 ) { + + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, \ + "can't convert from haddr to MPI off") + } + + i++; + + /* now remove the entry from the cleaned entry list */ + if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from cleaned entry slist.") + } + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + + aux_ptr->c_slist_len -= 1; + + HDassert( aux_ptr->c_slist_len >= 0 ); + + /* and also remove the matching entry from the dirtied list + * if it exists. + */ + if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr, + (void *)(&addr))) != NULL ) { + + HDassert( slist_entry_ptr->magic == + H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); + HDassert( slist_entry_ptr->addr == addr ); + + if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from dirty entry slist.") + } + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + + aux_ptr->d_slist_len -= 1; + + HDassert( aux_ptr->d_slist_len >= 0 ); + } + + } /* while */ + + + /* Now broadcast the list of cleaned entries -- if there is one. + * + * The peculiar structure of the following call to MPI_Bcast is + * due to MPI's (?) failure to believe in the MPI_Offset type. + * Thus the element type is MPI_BYTE, with size equal to the + * buf_size computed above. + */ + + mpi_result = MPI_Bcast((void *)buf_ptr, (int)buf_size, MPI_BYTE, 0, + aux_ptr->mpi_comm); + + if ( mpi_result != MPI_SUCCESS ) { + + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result) + } + } + +done: + + if ( buf_ptr != NULL ) { + + buf_ptr = (MPI_Offset *)H5MM_xfree((void *)buf_ptr); + } + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_broadcast_clean_list() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * * Function: H5AC_check_if_write_permitted * * Purpose: Determine if a write is permitted under the current @@ -1703,6 +2683,16 @@ done: * * Modifications: * + * John Mainzer, 9/23/05 + * Rewrote function to return the value of the + * write_permitted field in aux structure if the structure + * exists and mpi_rank is 0. + * + * If the aux structure exists, but mpi_rank isn't 0, the + * function now returns FALSE. + * + * In all other cases, the function returns TRUE. + * *------------------------------------------------------------------------- */ @@ -1720,49 +2710,1087 @@ H5AC_check_if_write_permitted(const H5F_t UNUSED * f, { hbool_t write_permitted = TRUE; herr_t ret_value = SUCCEED; /* Return value */ +#ifdef H5_HAVE_PARALLEL + H5AC_aux_t * aux_ptr = NULL; +#endif /* H5_HAVE_PARALLEL */ + FUNC_ENTER_NOAPI(H5AC_check_if_write_permitted, FAIL) #ifdef H5_HAVE_PARALLEL + HDassert( f != NULL ); + HDassert( f->shared != NULL ); + HDassert( f->shared->cache != NULL ); - if ( IS_H5FD_MPI(f) ) { + aux_ptr = (H5AC_aux_t *)(f->shared->cache->aux_ptr); + + if ( aux_ptr != NULL ) { + + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); - H5P_genplist_t *dxpl; /* Dataset transfer property list */ - H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode property value */ + if ( aux_ptr->mpi_rank == 0 ) { - /* Get the dataset transfer property list */ - if ( NULL == (dxpl = H5I_object(dxpl_id)) ) { + write_permitted = aux_ptr->write_permitted; - HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, \ - "not a dataset creation property list") + } else { + + write_permitted = FALSE; + } + } +#endif /* H5_HAVE_PARALLEL */ + + *write_permitted_ptr = write_permitted; + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_check_if_write_permitted() */ + +/*------------------------------------------------------------------------- + * + * Function: H5AC_log_deleted_entry() + * + * Purpose: Log an entry for which H5C__DELETED_FLAG has been set. + * + * If mpi_rank is 0, we must make sure that the entry doesn't + * appear in the cleaned or dirty entry lists. Otherwise, + * we have nothing to do. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 6/29/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_log_deleted_entry(H5AC_t * cache_ptr, + H5AC_info_t * entry_ptr, + haddr_t addr, + unsigned int flags) +{ + herr_t ret_value = SUCCEED; /* Return value */ + H5AC_aux_t * aux_ptr = NULL; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + + FUNC_ENTER_NOAPI(H5AC_log_deleted_entry, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = cache_ptr->aux_ptr; + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + + HDassert( entry_ptr != NULL ); + HDassert( entry_ptr->addr == addr ); + + HDassert( (flags & H5C__DELETED_FLAG) != 0 ); + + if ( aux_ptr->mpi_rank == 0 ) { + + HDassert( aux_ptr->d_slist_ptr != NULL ); + HDassert( aux_ptr->c_slist_ptr != NULL ); + + /* if the entry appears in the dirtied entry slist, remove it. */ + if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr, + (void *)(&addr))) != NULL ) { + + HDassert( slist_entry_ptr->magic == + H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); + HDassert( slist_entry_ptr->addr == addr ); + + if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from dirty entry slist.") + } + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + + aux_ptr->d_slist_len -= 1; + + HDassert( aux_ptr->d_slist_len >= 0 ); } - /* Get the transfer mode property */ - if( H5P_get(dxpl, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode) < 0 ) { + /* if the entry appears in the cleaned entry slist, remove it. */ + if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr, + (void *)(&addr))) != NULL ) { - HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, \ - "can't retrieve xfer mode") + HDassert( slist_entry_ptr->magic == + H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); + HDassert( slist_entry_ptr->addr == addr ); + + if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from cleaned entry slist.") + } + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + + aux_ptr->c_slist_len -= 1; + + HDassert( aux_ptr->c_slist_len >= 0 ); } + } - if ( xfer_mode == H5FD_MPIO_INDEPENDENT ) { +done: + + FUNC_LEAVE_NOAPI(ret_value) - write_permitted = FALSE; +} /* H5AC_log_deleted_entry() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC_log_dirtied_entry() + * + * Purpose: Update the dirty_bytes count for a newly dirtied entry. + * + * If mpi_rank isnt 0, this simply means adding the size + * of the entries to the dirty_bytes count. + * + * If mpi_rank is 0, we must first check to see if the entry + * appears in the dirty entries slist. If it is, do nothing. + * If it isn't, add the size to th dirty_bytes count, add the + * entry to the dirty entries slist, and remove it from the + * cleaned list (if it is present there). + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 6/29/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_log_dirtied_entry(H5AC_t * cache_ptr, + H5AC_info_t * entry_ptr, + haddr_t addr, + hbool_t size_changed, + size_t new_size) +{ + herr_t ret_value = SUCCEED; /* Return value */ + size_t entry_size; + H5AC_aux_t * aux_ptr = NULL; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + + FUNC_ENTER_NOAPI(H5AC_log_dirtied_entry, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = cache_ptr->aux_ptr; + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + + HDassert( entry_ptr != NULL ); + HDassert( entry_ptr->addr == addr ); + HDassert( entry_ptr->is_dirty == FALSE ); + + if ( size_changed ) { + + entry_size = new_size; + + } else { + + entry_size = entry_ptr->size; + } + + if ( aux_ptr->mpi_rank == 0 ) { + + HDassert( aux_ptr->d_slist_ptr != NULL ); + HDassert( aux_ptr->c_slist_ptr != NULL ); + + if ( H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr)) == NULL ) { + + /* insert the address of the entry in the dirty entry list, and + * add its size to the dirty_bytes count. + */ + if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "Can't allocate dirty slist entry .") + } + + slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC; + slist_entry_ptr->addr = addr; + + if ( H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, + &(slist_entry_ptr->addr)) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \ + "can't insert entry into dirty entry slist.") + } + + aux_ptr->d_slist_len += 1; + aux_ptr->dirty_bytes += entry_size; +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->unprotect_dirty_bytes += entry_size; + aux_ptr->unprotect_dirty_bytes_updates += 1; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + } + + if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) != NULL ) { + + /* the entry is dirty. If it exists on the cleaned entries list, + * remove it. + */ + if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr, + (void *)(&addr))) != NULL ) { + + HDassert( slist_entry_ptr->magic == + H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); + HDassert( slist_entry_ptr->addr == addr ); + + if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from clean entry slist.") + } + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + + aux_ptr->c_slist_len -= 1; + + HDassert( aux_ptr->c_slist_len >= 0 ); + } + } + } else { + + aux_ptr->dirty_bytes += entry_size; +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->unprotect_dirty_bytes += entry_size; + aux_ptr->unprotect_dirty_bytes_updates += 1; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + } + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_log_dirtied_entry() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC_log_flushed_entry() + * + * Purpose: Update the clean entry slist for the flush of an entry -- + * specifically, if the entry has been cleared, remove it + * from both the cleaned and dirtied lists if it is present. + * Otherwise, if the entry was dirty, insert the indicated + * entry address in the clean slist if it isn't there already. + * + * This function is only used in PHDF5, and should only + * be called for the process with mpi rank 0. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 6/29/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +#if 0 /* This is useful debugging code. -- JRM */ +static herr_t +H5AC_log_flushed_entry_dummy(H5C_t * cache_ptr, + haddr_t addr, + hbool_t was_dirty, + unsigned flags, + int type_id) +{ + herr_t ret_value = SUCCEED; /* Return value */ + H5AC_aux_t * aux_ptr = NULL; + + FUNC_ENTER_NOAPI(H5AC_log_flushed_entry_dummy, FAIL) + + aux_ptr = cache_ptr->aux_ptr; + + if ( ( was_dirty ) && ( (flags & H5C__FLUSH_CLEAR_ONLY_FLAG) == 0 ) ) { + + HDfprintf(stdout, + "%d:H5AC_log_flushed_entry(): addr = %d, flags = %x, was_dirty = %d, type_id = %d\n", + (int)(aux_ptr->mpi_rank), (int)addr, flags, (int)was_dirty, type_id); + } +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_log_flushed_entry_dummy() */ +#endif /* JRM */ + +static herr_t +H5AC_log_flushed_entry(H5C_t * cache_ptr, + haddr_t addr, + hbool_t was_dirty, + unsigned flags, + UNUSED int type_id) +{ + herr_t ret_value = SUCCEED; /* Return value */ + hbool_t cleared; + H5AC_aux_t * aux_ptr; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + + + FUNC_ENTER_NOAPI(H5AC_log_flushed_entry, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = cache_ptr->aux_ptr; + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->mpi_rank == 0 ); + HDassert( aux_ptr->c_slist_ptr != NULL ); + + cleared = ( (flags & H5C__FLUSH_CLEAR_ONLY_FLAG) != 0 ); + + if ( cleared ) { + + /* If the entry has been cleared, must remove it from both the + * cleaned list and the dirtied list. + */ + + if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr, + (void *)(&addr))) != NULL ) { + + HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC); + HDassert( slist_entry_ptr->addr == addr ); + + if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from clean entry slist.") + } + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + + aux_ptr->c_slist_len -= 1; + + HDassert( aux_ptr->c_slist_len >= 0 ); + } + + if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr, + (void *)(&addr))) != NULL ) { + + HDassert( slist_entry_ptr->magic == H5AC__H5AC_SLIST_ENTRY_T_MAGIC); + HDassert( slist_entry_ptr->addr == addr ); + + if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from dirty entry slist.") + } + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + + aux_ptr->d_slist_len -= 1; + + HDassert( aux_ptr->d_slist_len >= 0 ); + } + } else if ( was_dirty ) { + + if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) == NULL ) { + + /* insert the address of the entry in the clean entry list. */ + + if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "Can't allocate clean slist entry .") + } + + slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC; + slist_entry_ptr->addr = addr; + + if ( H5SL_insert(aux_ptr->c_slist_ptr, slist_entry_ptr, + &(slist_entry_ptr->addr)) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \ + "can't insert entry into clean entry slist.") + } + + aux_ptr->c_slist_len += 1; + } + } + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_log_flushed_entry() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC_log_inserted_entry() + * + * Purpose: Update the dirty_bytes count for a newly inserted entry. + * + * If mpi_rank isnt 0, this simply means adding the size + * of the entry to the dirty_bytes count. + * + * If mpi_rank is 0, we must also add the entry to the + * dirty entries slist. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 6/30/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_log_inserted_entry(H5F_t * f, + H5AC_t * cache_ptr, + H5AC_info_t * entry_ptr, + const H5AC_class_t * type, + haddr_t addr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + size_t size; + H5AC_aux_t * aux_ptr = NULL; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + + FUNC_ENTER_NOAPI(H5AC_log_inserted_entry, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = cache_ptr->aux_ptr; + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + + HDassert( entry_ptr != NULL ); + HDassert( entry_ptr->addr == addr ); + HDassert( entry_ptr->type == type ); + + /* the size field of the entry will not have been set yet, so we + * have to obtain it directly. + */ + if ( (type->size)(f, (void *)entry_ptr, &size) < 0 ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTGETSIZE, FAIL, \ + "Can't get size of entry to be inserted.") + } + + if ( aux_ptr->mpi_rank == 0 ) { + + HDassert( aux_ptr->d_slist_ptr != NULL ); + HDassert( aux_ptr->c_slist_ptr != NULL ); + + if ( H5SL_search(aux_ptr->d_slist_ptr, (void *)(&addr)) == NULL ) { + + /* insert the address of the entry in the dirty entry list, and + * add its size to the dirty_bytes count. + */ + if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "Can't allocate dirty slist entry .") + } + + slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC; + slist_entry_ptr->addr = addr; + + if ( H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, + &(slist_entry_ptr->addr)) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \ + "can't insert entry into dirty entry slist.") + } + + aux_ptr->d_slist_len += 1; + + } else { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Inserted entry already in dirty slist.") + } + + if ( H5SL_search(aux_ptr->c_slist_ptr, (void *)(&addr)) != NULL ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Inserted entry in clean slist.") + } + } + + aux_ptr->dirty_bytes += size; + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->insert_dirty_bytes += size; + aux_ptr->insert_dirty_bytes_updates += 1; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_log_inserted_entry() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC_log_renamed_entry() + * + * Purpose: Update the dirty_bytes count for a renamed entry. + * + * WARNING + * + * At present, the way that the rename call is used ensures + * that the renamed entry is present in all caches by + * renaming in a collective operation and immediately after + * unprotecting the target entry. + * + * This function uses this invarient, and will cause arcane + * failures if it is not met. If maintaining this invarient + * becomes impossible, we will have to rework this function + * extensively, and likely include a bit of IPC for + * synchronization. A better option might be to subsume + * rename in the unprotect operation. + * + * Given that the target entry is in all caches, the function + * proceeds as follows: + * + * For processes with mpi rank other 0, it simply checks to + * see if the entry was dirty prior to the rename, and adds + * the entries size to the dirty bytes count. + * + * In the process with mpi rank 0, the function first checks + * to see if the entry was dirty prior to the rename. If it + * was, and if the entry doesn't appear in the dirtied list + * under its old address, it adds the entry's size to the + * dirty bytes count. + * + * The rank 0 process then removes any references to the + * entry under its old address from the cleands and dirtied + * lists, and inserts an entry in the dirtied list under the + * new address. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 6/30/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_log_renamed_entry(H5AC_t * cache_ptr, + haddr_t old_addr, + haddr_t new_addr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + hbool_t entry_in_cache; + hbool_t entry_dirty; + size_t entry_size; + H5AC_aux_t * aux_ptr = NULL; + H5AC_slist_entry_t * slist_entry_ptr = NULL; + + FUNC_ENTER_NOAPI(H5AC_log_renamed_entry, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = cache_ptr->aux_ptr; + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + + /* get entry status, size, etc here */ + if ( H5C_get_entry_status(cache_ptr, old_addr, &entry_size, &entry_in_cache, + &entry_dirty, NULL) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't get entry status.") + + } else if ( ! entry_in_cache ) { + + HDassert( entry_in_cache ); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry not in cache.") + } + + if ( aux_ptr->mpi_rank == 0 ) { + + HDassert( aux_ptr->d_slist_ptr != NULL ); + HDassert( aux_ptr->c_slist_ptr != NULL ); + + /* if the entry appears in the cleaned entry slist, under its old + * address, remove it. + */ + if ( (slist_entry_ptr = H5SL_search(aux_ptr->c_slist_ptr, + (void *)(&old_addr))) != NULL ) { + + HDassert( slist_entry_ptr->magic == + H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); + HDassert( slist_entry_ptr->addr == old_addr ); + + if ( H5SL_remove(aux_ptr->c_slist_ptr, (void *)(&old_addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from cleaned entry slist.") + } + + slist_entry_ptr->magic = 0; + H5FL_FREE(H5AC_slist_entry_t, slist_entry_ptr); + slist_entry_ptr = NULL; + + aux_ptr->c_slist_len -= 1; + + HDassert( aux_ptr->c_slist_len >= 0 ); + } + + /* if the entry appears in the dirtied entry slist under its old + * address, remove it, but don't free it. Set addr to new_addr. + */ + if ( (slist_entry_ptr = H5SL_search(aux_ptr->d_slist_ptr, + (void *)(&old_addr))) != NULL ) { + + HDassert( slist_entry_ptr->magic == + H5AC__H5AC_SLIST_ENTRY_T_MAGIC ); + HDassert( slist_entry_ptr->addr == old_addr ); + + if ( H5SL_remove(aux_ptr->d_slist_ptr, (void *)(&old_addr)) + != slist_entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTDELETE, FAIL, \ + "Can't delete entry from dirty entry slist.") + } + + slist_entry_ptr->addr = new_addr; + + aux_ptr->d_slist_len -= 1; + + HDassert( aux_ptr->d_slist_len >= 0 ); } else { + + /* otherwise, allocate a new entry that is ready + * for insertion, and increment dirty_bytes. + * + * Note that the fact that the entry wasn't in the dirtied + * list under its old address implies that it must have + * been clean to start with. + */ + + HDassert( !entry_dirty ); + + if ( NULL == (slist_entry_ptr = H5FL_CALLOC(H5AC_slist_entry_t)) ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "Can't allocate dirty slist entry .") + } + + slist_entry_ptr->magic = H5AC__H5AC_SLIST_ENTRY_T_MAGIC; + slist_entry_ptr->addr = new_addr; - HDassert(xfer_mode == H5FD_MPIO_COLLECTIVE ); + aux_ptr->dirty_bytes += entry_size; +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->rename_dirty_bytes += entry_size; + aux_ptr->rename_dirty_bytes_updates += 1; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ } + + /* verify that there is no entry at new_addr in the dirty slist */ + if ( H5SL_search(aux_ptr->d_slist_ptr, (void *)(&new_addr)) != NULL ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "dirty slist already contains entry at new_addr.") + } + + /* insert / reinsert the entry in the dirty slist */ + if ( H5SL_insert(aux_ptr->d_slist_ptr, slist_entry_ptr, + &(slist_entry_ptr->addr)) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTINSERT, FAIL, \ + "can't insert entry into dirty entry slist.") + } + + aux_ptr->d_slist_len += 1; + + } else if ( ! entry_dirty ) { + + aux_ptr->dirty_bytes += entry_size; + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->rename_dirty_bytes += entry_size; + aux_ptr->rename_dirty_bytes_updates += 1; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ } +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_log_renamed_entry() */ #endif /* H5_HAVE_PARALLEL */ - *write_permitted_ptr = write_permitted; + +/*------------------------------------------------------------------------- + * Function: H5AC_propagate_flushed_and_still_clean_entries_list + * + * Purpose: In PHDF5, only the metadata cache with mpi rank 0 is allowed + * to write to file. All other metadata caches on processes + * with rank greater than 0 must retain dirty entries until + * they are notified that the entry is now clean. + * + * This function is the main routine for that proceedure. + * It must be called simultaniously on all processes that + * have the relevant file open. To this end, there must + * be a barrier immediately prior to this call. + * + * Typicaly, this will be done one of two ways: + * + * 1) Dirty byte creation exceeds some user specified value. + * + * While metadata reads may occur independently, all + * operations writing metadata must be collective. Thus + * all metadata caches see the same sequence of operations, + * and therefore the same dirty data creation. + * + * This fact is used to synchronize the caches for purposes + * of propagating the list of flushed and still clean + * entries, by simply calling this function from all + * caches whenever some user specified threshold on dirty + * data is exceeded. + * + * 2) Under direct user control -- this operation must be + * collective. + * + * The operations to be managed by this function are as + * follows: + * + * For the process with mpi rank 0: + * + * 1) Enable writes, flush the cache to its min clean size, + * and then disable writes again. + * + * 2) Load the contents of the flushed and still clean entries + * list (c_slist_ptr) into a buffer, and broadcast that + * buffer to all the other caches. + * + * 3) Clear the flushed and still clean entries list + * (c_slist_ptr). + * + * + * For all processes with mpi rank greater than 0: + * + * 1) Receive the flushed and still clean entries list broadcast + * + * 2) Mark the specified entries as clean. + * + * + * For all processes: + * + * 1) Reset the dirtied bytes count to 0. + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: John Mainzer + * July 5, 2005 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +herr_t +H5AC_propagate_flushed_and_still_clean_entries_list(H5F_t * f, + hid_t dxpl_id, + H5AC_t * cache_ptr, + hbool_t do_barrier) +{ + herr_t ret_value = SUCCEED; /* Return value */ + herr_t result; + int mpi_code; + H5AC_aux_t * aux_ptr = NULL; + + FUNC_ENTER_NOAPI(H5AC_propagate_flushed_and_still_clean_entries_list, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = cache_ptr->aux_ptr; + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + HDfprintf(stdout, + "%d:H5AC_propagate...:%d: (u/uu/i/iu/r/ru) = %d/%d/%d/%d/%d/%d\n", + (int)(aux_ptr->mpi_rank), + (int)(aux_ptr->dirty_bytes_propagations), + (int)(aux_ptr->unprotect_dirty_bytes), + (int)(aux_ptr->unprotect_dirty_bytes_updates), + (int)(aux_ptr->insert_dirty_bytes), + (int)(aux_ptr->insert_dirty_bytes_updates), + (int)(aux_ptr->rename_dirty_bytes), + (int)(aux_ptr->rename_dirty_bytes_updates)); +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + + if ( do_barrier ) { + + /* to prevent "messages from the future" we must synchronize all + * processes before we start the flush. This synchronization may + * already be done -- hence the do_barrier parameter. + */ + + if ( MPI_SUCCESS != (mpi_code = MPI_Barrier(aux_ptr->mpi_comm)) ) { + + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) + } + } + + if ( aux_ptr->mpi_rank == 0 ) { + + aux_ptr->write_permitted = TRUE; + + result = H5C_flush_to_min_clean(f, dxpl_id, H5AC_noblock_dxpl_id, + cache_ptr); + + aux_ptr->write_permitted = FALSE; + + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "H5C_flush_to_min_clean() failed.") + } + + if ( H5AC_broadcast_clean_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Can't broadcast clean slist.") + } + + HDassert( aux_ptr->c_slist_len == 0 ); + + } else { + + if ( H5AC_receive_and_apply_clean_list(f, dxpl_id, + H5AC_noblock_dxpl_id, + cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Can't receive and/or process clean slist broadcast.") + } + } + + aux_ptr->dirty_bytes = 0; +#if H5AC_DEBUG_DIRTY_BYTES_CREATION + aux_ptr->dirty_bytes_propagations += 1; + aux_ptr->unprotect_dirty_bytes = 0; + aux_ptr->unprotect_dirty_bytes_updates = 0; + aux_ptr->insert_dirty_bytes = 0; + aux_ptr->insert_dirty_bytes_updates = 0; + aux_ptr->rename_dirty_bytes = 0; + aux_ptr->rename_dirty_bytes_updates = 0; +#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ done: FUNC_LEAVE_NOAPI(ret_value) -} /* H5AC_check_if_write_permitted() */ +} /* H5AC_propagate_flushed_and_still_clean_entries_list() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * + * Function: H5AC_receive_and_apply_clean_list() + * + * Purpose: Receive the list of cleaned entries from process 0, + * and mark the specified entries as clean. + * + * This function must only be called by the process with + * MPI_rank greater than 0. + * + * Return SUCCEED on success, and FAIL on failure. + * + * Return: Non-negative on success/Negative on failure. + * + * Programmer: John Mainzer, 7/4/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +static herr_t +H5AC_receive_and_apply_clean_list(H5F_t * f, + hid_t primary_dxpl_id, + hid_t secondary_dxpl_id, + H5AC_t * cache_ptr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + H5AC_aux_t * aux_ptr = NULL; + haddr_t * haddr_buf_ptr = NULL; + MPI_Offset * MPI_Offset_buf_ptr = NULL; + size_t buf_size; + int i = 0; + int mpi_result; + int num_entries; + + FUNC_ENTER_NOAPI(H5AC_receive_and_apply_clean_list, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + aux_ptr = cache_ptr->aux_ptr; + + HDassert( aux_ptr != NULL ); + HDassert( aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC ); + HDassert( aux_ptr->mpi_rank != 0 ); + + /* First receive the number of entries in the list so that we + * can set up a buffer to receive them. If there aren't + * any, we are done. + */ + mpi_result = MPI_Bcast(&num_entries, 1, MPI_INT, 0, aux_ptr->mpi_comm); + + if ( mpi_result != MPI_SUCCESS ) { + + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 1", mpi_result) + } + + if ( num_entries > 0 ) + { + /* allocate a buffers to store the list of entry base addresses in */ + + buf_size = sizeof(MPI_Offset) * (size_t)num_entries; + + MPI_Offset_buf_ptr = (MPI_Offset *)H5MM_malloc(buf_size); + + if ( MPI_Offset_buf_ptr == NULL ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "memory allocation failed for receive buffer") + } + + haddr_buf_ptr = (haddr_t *)H5MM_malloc(sizeof(haddr_t) * + (size_t)num_entries); + + if ( haddr_buf_ptr == NULL ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "memory allocation failed for haddr buffer") + } + + + /* Now receive the list of cleaned entries + * + * The peculiar structure of the following call to MPI_Bcast is + * due to MPI's (?) failure to believe in the MPI_Offset type. + * Thus the element type is MPI_BYTE, with size equal to the + * buf_size computed above. + */ + + mpi_result = MPI_Bcast((void *)MPI_Offset_buf_ptr, (int)buf_size, + MPI_BYTE, 0, aux_ptr->mpi_comm); + + if ( mpi_result != MPI_SUCCESS ) { + + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed 2", mpi_result) + } + + + /* translate the MPI_Offsets to haddr_t */ + i = 0; + while ( i < num_entries ) + { + haddr_buf_ptr[i] = H5FD_mpi_MPIOff_to_haddr(MPI_Offset_buf_ptr[i]); + + if ( haddr_buf_ptr[i] == HADDR_UNDEF ) { + + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, \ + "can't convert MPI off to haddr") + } + + i++; + } + + + /* mark the indicated entries as clean */ + if ( H5C_mark_entries_as_clean(f, primary_dxpl_id, secondary_dxpl_id, + cache_ptr, (int32_t)num_entries, + &(haddr_buf_ptr[0])) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Can't mark entries clean.") + + } + } + +done: + + if ( MPI_Offset_buf_ptr != NULL ) { + + MPI_Offset_buf_ptr = + (MPI_Offset *)H5MM_xfree((void *)MPI_Offset_buf_ptr); + } + + if ( haddr_buf_ptr != NULL ) { + + haddr_buf_ptr = (haddr_t *)H5MM_xfree((void *)haddr_buf_ptr); + } + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5AC_receive_and_apply_clean_list() */ +#endif /* H5_HAVE_PARALLEL */ + diff --git a/src/H5ACprivate.h b/src/H5ACprivate.h index 450907f..3cbe62e 100644 --- a/src/H5ACprivate.h +++ b/src/H5ACprivate.h @@ -182,7 +182,7 @@ extern hid_t H5AC_ind_dxpl_id; /* int version = */ H5C__CURR_AUTO_SIZE_CTL_VER, \ /* hbool_t rpt_fcn_enabled = */ FALSE, \ /* hbool_t set_initial_size = */ TRUE, \ - /* size_t initial_size = */ (1 * 1024 * 1024), \ + /* size_t initial_size = */ ( 1 * 1024 * 1024), \ /* double min_clean_fraction = */ 0.25, \ /* size_t max_size = */ (16 * 1024 * 1024), \ /* size_t min_size = */ ( 1 * 1024 * 1024), \ @@ -216,6 +216,7 @@ extern hid_t H5AC_ind_dxpl_id; #define H5AC__SET_FLUSH_MARKER_FLAG H5C__SET_FLUSH_MARKER_FLAG #define H5AC__DELETED_FLAG H5C__DELETED_FLAG #define H5AC__DIRTIED_FLAG H5C__DIRTIED_FLAG +#define H5AC__SIZE_CHANGED_FLAG H5C__SIZE_CHANGED_FLAG #define H5AC__FLUSH_INVALIDATE_FLAG H5C__FLUSH_INVALIDATE_FLAG #define H5AC__FLUSH_CLEAR_ONLY_FLAG H5C__FLUSH_CLEAR_ONLY_FLAG #define H5AC__FLUSH_MARKED_ENTRIES_FLAG H5C__FLUSH_MARKED_ENTRIES_FLAG @@ -235,6 +236,7 @@ H5_DLL herr_t H5AC_unprotect(H5F_t *f, hid_t dxpl_id, H5_DLL herr_t H5AC_flush(H5F_t *f, hid_t dxpl_id, unsigned flags); H5_DLL herr_t H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t new_addr); + H5_DLL herr_t H5AC_dest(H5F_t *f, hid_t dxpl_id); H5_DLL herr_t H5AC_stats(const H5F_t *f); @@ -200,6 +200,12 @@ * caused compiler warnings. * JRM - 1/10/05 * + * - Added the H5C__DLL_UPDATE_FOR_SIZE_CHANGE macro and the associated + * sanity checking macros. These macro are used to update the size of + * a DLL when one of its entries changes size. + * + * JRM - 9/8/05 + * ****************************************************************************/ #if H5C_DO_SANITY_CHECKS @@ -267,11 +273,29 @@ if ( ( (entry_ptr) == NULL ) || \ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "DLL pre insert SC failed") \ } +#define H5C__DLL_PRE_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size) \ +if ( ( (dll_len) <= 0 ) || \ + ( (dll_size) <= 0 ) || \ + ( (old_size) <= 0 ) || \ + ( (old_size) > (dll_size) ) || \ + ( (new_size) <= 0 ) || \ + ( ( (dll_len) == 1 ) && ( (old_size) != (dll_size) ) ) ) { \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "DLL pre size update SC failed") \ +} + +#define H5C__DLL_POST_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size) \ +if ( ( (new_size) > (dll_size) ) || \ + ( ( (dll_len) == 1 ) && ( (new_size) != (dll_size) ) ) ) { \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "DLL post size update SC failed") \ +} + #else /* H5C_DO_SANITY_CHECKS */ #define H5C__DLL_PRE_REMOVE_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv) #define H5C__DLL_SC(head_ptr, tail_ptr, len, Size, fv) #define H5C__DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv) +#define H5C__DLL_PRE_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size) +#define H5C__DLL_POST_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size) #endif /* H5C_DO_SANITY_CHECKS */ @@ -344,6 +368,11 @@ if ( ( (entry_ptr) == NULL ) || \ (Size) -= entry_ptr->size; \ } +#define H5C__DLL_UPDATE_FOR_SIZE_CHANGE(dll_len, dll_size, old_size, new_size) \ + H5C__DLL_PRE_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size) \ + (dll_size) -= (old_size); \ + (dll_size) += (new_size); \ + H5C__DLL_POST_SIZE_UPDATE_SC(dll_len, dll_size, old_size, new_size) #if H5C_DO_SANITY_CHECKS @@ -534,6 +563,19 @@ if ( ( (entry_ptr) == NULL ) || \ #define H5C__UPDATE_STATS_FOR_RENAME(cache_ptr, entry_ptr) \ (((cache_ptr)->renames)[(entry_ptr)->type->id])++; +#define H5C__UPDATE_STATS_FOR_ENTRY_SIZE_CHANGE(cache_ptr, entry_ptr, new_size)\ + if ( (entry_ptr)->size < (new_size) ) { \ + ((cache_ptr)->size_increases[(entry_ptr)->type->id])++; \ + if ( (cache_ptr)->index_size > (cache_ptr)->max_index_size ) \ + (cache_ptr)->max_index_size = (cache_ptr)->index_size; \ + if ( (cache_ptr)->slist_size > (cache_ptr)->max_slist_size ) \ + (cache_ptr)->max_slist_size = (cache_ptr)->slist_size; \ + if ( (cache_ptr)->pl_size > (cache_ptr)->max_pl_size ) \ + (cache_ptr)->max_pl_size = (cache_ptr)->pl_size; \ + } else { \ + ((cache_ptr)->size_decreases[(entry_ptr)->type->id])++; \ + } + #define H5C__UPDATE_STATS_FOR_HT_INSERTION(cache_ptr) \ (cache_ptr)->total_ht_insertions++; @@ -646,6 +688,7 @@ if ( ( (entry_ptr) == NULL ) || \ #define H5C__RESET_CACHE_ENTRY_STATS(entry_ptr) #define H5C__UPDATE_STATS_FOR_UNPROTECT(cache_ptr) #define H5C__UPDATE_STATS_FOR_RENAME(cache_ptr, entry_ptr) +#define H5C__UPDATE_STATS_FOR_ENTRY_SIZE_CHANGE(cache_ptr, entry_ptr, new_size) #define H5C__UPDATE_STATS_FOR_HT_INSERTION(cache_ptr) #define H5C__UPDATE_STATS_FOR_HT_DELETION(cache_ptr) #define H5C__UPDATE_STATS_FOR_HT_SEARCH(cache_ptr, success, depth) @@ -748,9 +791,32 @@ if ( ( (cache_ptr) == NULL ) || \ ( ((cache_ptr)->index)[k] != (entry_ptr) ) || \ ( (entry_ptr)->ht_prev != NULL ) ) { \ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, \ - "Post HT shift to front SC failed") \ + "Post HT shift to front SC failed") \ +} + +#define H5C__PRE_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) \ +if ( ( (cache_ptr) == NULL ) || \ + ( (cache_ptr)->index_len <= 0 ) || \ + ( (cache_ptr)->index_size <= 0 ) || \ + ( (new_size) <= 0 ) || \ + ( (old_size) > (cache_ptr)->index_size ) || \ + ( (new_size) <= 0 ) || \ + ( ( (cache_ptr)->index_len == 1 ) && \ + ( (cache_ptr)->index_size != (old_size) ) ) ) { \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Pre HT entry size change SC failed") \ } +#define H5C__POST_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) \ +if ( ( (cache_ptr) == NULL ) || \ + ( (cache_ptr)->index_len <= 0 ) || \ + ( (cache_ptr)->index_size <= 0 ) || \ + ( (new_size) > (cache_ptr)->index_size ) || \ + ( ( (cache_ptr)->index_len == 1 ) && \ + ( (cache_ptr)->index_size != (new_size) ) ) ) { \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Post HT entry size change SC failed") \ +} #else /* H5C_DO_SANITY_CHECKS */ @@ -759,6 +825,8 @@ if ( ( (cache_ptr) == NULL ) || \ #define H5C__PRE_HT_SEARCH_SC(cache_ptr, Addr, fail_val) #define H5C__POST_SUC_HT_SEARCH_SC(cache_ptr, entry_ptr, Addr, k, fail_val) #define H5C__POST_HT_SHIFT_TO_FRONT(cache_ptr, entry_ptr, k, fail_val) +#define H5C__PRE_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) +#define H5C__POST_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) #endif /* H5C_DO_SANITY_CHECKS */ @@ -840,6 +908,14 @@ if ( ( (cache_ptr) == NULL ) || \ H5C__UPDATE_STATS_FOR_HT_SEARCH(cache_ptr, (entry_ptr != NULL), depth) \ } +#define H5C__UPDATE_INDEX_FOR_SIZE_CHANGE(cache_ptr, old_size, new_size) \ +{ \ + H5C__PRE_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) \ + (cache_ptr)->index_size -= old_size; \ + (cache_ptr)->index_size += new_size; \ + H5C__POST_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size) \ +} + /************************************************************************** * @@ -896,7 +972,7 @@ if ( ( (cache_ptr) == NULL ) || \ HDassert( H5F_addr_defined((entry_ptr)->addr) ); \ HDassert( !((entry_ptr)->in_slist) ); \ \ - if ( H5SL_insert((cache_ptr)->slist_ptr, &(entry_ptr)->addr, entry_ptr) \ + if ( H5SL_insert((cache_ptr)->slist_ptr, entry_ptr, &(entry_ptr)->addr) \ < 0 ) \ HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, FAIL, \ "Can't insert entry in skip list") \ @@ -963,6 +1039,44 @@ if ( ( (cache_ptr) == NULL ) || \ } /* H5C__REMOVE_ENTRY_FROM_SLIST */ +/*------------------------------------------------------------------------- + * + * Function: H5C__UPDATE_SLIST_FOR_SIZE_CHANGE + * + * Purpose: Update cache_ptr->slist_size for a change in the size of + * and entry in the slist. + * + * Return: N/A + * + * Programmer: John Mainzer, 9/07/05 + * + * Modifications: + * + * None. + * + *------------------------------------------------------------------------- + */ + +#define H5C__UPDATE_SLIST_FOR_SIZE_CHANGE(cache_ptr, old_size, new_size) \ +{ \ + HDassert( (cache_ptr) ); \ + HDassert( (cache_ptr)->magic == H5C__H5C_T_MAGIC ); \ + HDassert( (old_size) > 0 ); \ + HDassert( (new_size) > 0 ); \ + HDassert( (old_size) <= (cache_ptr)->slist_size ); \ + HDassert( (cache_ptr)->slist_len > 0 ); \ + HDassert( ((cache_ptr)->slist_len > 1) || \ + ( (cache_ptr)->slist_size == (old_size) ) ); \ + \ + (cache_ptr)->slist_size -= (old_size); \ + (cache_ptr)->slist_size += (new_size); \ + \ + HDassert( (new_size) <= (cache_ptr)->slist_size ); \ + HDassert( ( (cache_ptr)->slist_len > 1 ) || \ + ( (cache_ptr)->slist_size == (new_size) ) ); \ +} /* H5C__REMOVE_ENTRY_FROM_SLIST */ + + /************************************************************************** * * Replacement policy update macros: @@ -1732,6 +1846,11 @@ static herr_t H5C_make_space_in_cache(H5F_t * f, size_t space_needed, hbool_t write_permitted, hbool_t * first_flush_ptr); +#if H5C_DO_EXTREME_SANITY_CHECKS +static herr_t H5C_validate_lru_list(H5C_t * cache_ptr); +static herr_t H5C_verify_not_in_index(H5C_t * cache_ptr, + H5C_cache_entry_t * entry_ptr); +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ /**************************************************************************** @@ -1874,8 +1993,8 @@ done: * * The check_write_permitted parameter must either be NULL, * or point to a function of type H5C_write_permitted_func_t. - * If it is NULL, the cache will presume that writes are - * always permitted. + * If it is NULL, the cache will use the write_permitted + * flag to determine whether writes are permitted. * * Return: Success: Pointer to the new instance. * @@ -1901,6 +2020,16 @@ done: * Added/updated initialization for the automatic cache * size control data structures. * + * JRM -- 6/24/05 + * Added support for the new write_permitted field of + * the H5C_t structure. + * + * JRM -- 7/5/05 + * Added the new log_flush parameter and supporting code. + * + * JRM -- 9/21/05 + * Added the new aux_ptr parameter and supporting code. + * *------------------------------------------------------------------------- */ @@ -1909,7 +2038,10 @@ H5C_create(size_t max_cache_size, size_t min_clean_size, int max_type_id, const char * (* type_name_table_ptr), - H5C_write_permitted_func_t check_write_permitted) + H5C_write_permitted_func_t check_write_permitted, + hbool_t write_permitted, + H5C_log_flush_func_t log_flush, + void * aux_ptr) { int i; H5C_t * cache_ptr = NULL; @@ -1925,6 +2057,8 @@ H5C_create(size_t max_cache_size, HDassert( max_type_id < H5C__MAX_NUM_TYPE_IDS ); HDassert( type_name_table_ptr ); + HDassert( ( write_permitted == TRUE ) || ( write_permitted == FALSE ) ); + for ( i = 0; i <= max_type_id; i++ ) { HDassert( (type_name_table_ptr)[i] ); @@ -1950,6 +2084,8 @@ H5C_create(size_t max_cache_size, cache_ptr->magic = H5C__H5C_T_MAGIC; + cache_ptr->aux_ptr = aux_ptr; + cache_ptr->max_type_id = max_type_id; cache_ptr->type_name_table_ptr = type_name_table_ptr; @@ -1957,6 +2093,9 @@ H5C_create(size_t max_cache_size, cache_ptr->min_clean_size = min_clean_size; cache_ptr->check_write_permitted = check_write_permitted; + cache_ptr->write_permitted = write_permitted; + + cache_ptr->log_flush = log_flush; cache_ptr->index_len = 0; cache_ptr->index_size = (size_t)0; @@ -2426,11 +2565,13 @@ H5C_flush_cache(H5F_t * f, hbool_t first_flush = TRUE; int32_t protected_entries = 0; int32_t i; - H5SL_node_t * node_ptr; - H5C_cache_entry_t * entry_ptr; + H5SL_node_t * node_ptr = NULL; + H5C_cache_entry_t * entry_ptr = NULL; #if H5C_DO_SANITY_CHECKS int32_t actual_slist_len = 0; + int32_t initial_slist_len = 0; size_t actual_slist_size = 0; + size_t initial_slist_size = 0; #endif /* H5C_DO_SANITY_CHECKS */ FUNC_ENTER_NOAPI(H5C_flush_cache, FAIL) @@ -2468,12 +2609,28 @@ H5C_flush_cache(H5F_t * f, } else { node_ptr = H5SL_first(cache_ptr->slist_ptr); + +#if H5C_DO_SANITY_CHECKS + /* H5C_flush_single_entry() now removes dirty entries from the + * slist as it flushes them. Thus for sanity checks we must + * make note of the initial slist length and size before we + * do any flushes. + */ + initial_slist_len = cache_ptr->slist_len; + initial_slist_size = cache_ptr->slist_size; +#endif /* H5C_DO_SANITY_CHECKS */ + } while ( node_ptr != NULL ) { entry_ptr = (H5C_cache_entry_t *)H5SL_item(node_ptr); + /* increment node pointer now, before we delete its target + * from the slist. + */ + node_ptr = H5SL_next(node_ptr); + HDassert( entry_ptr != NULL ); HDassert( entry_ptr->in_slist ); @@ -2512,14 +2669,22 @@ H5C_flush_cache(H5F_t * f, } } } - - node_ptr = H5SL_next(node_ptr); - } /* while */ #if H5C_DO_SANITY_CHECKS - HDassert( actual_slist_len == cache_ptr->slist_len ); - HDassert( actual_slist_size == cache_ptr->slist_size ); + HDassert( actual_slist_len == initial_slist_len ); + HDassert( actual_slist_size == initial_slist_size ); + + if ( (flags & H5C__FLUSH_INVALIDATE_FLAG) != 0 ) { + + HDassert( cache_ptr->slist_len == initial_slist_len ); + HDassert( cache_ptr->slist_size == initial_slist_size ); + + } else if ( ! flush_marked_entries ) { + + HDassert( cache_ptr->slist_len == 0 ); + HDassert( cache_ptr->slist_size == 0 ); + } #endif /* H5C_DO_SANITY_CHECKS */ if ( destroy ) { @@ -2625,6 +2790,95 @@ done: /*------------------------------------------------------------------------- + * Function: H5C_flush_to_min_clean + * + * Purpose: Flush dirty entries until the caches min clean size is + * attained. + * + * This function is used in the implementation of the + * metadata cache in PHDF5. To avoid "messages from the + * future", the cache on process 0 can't be allowed to + * flush entries until the other processes have reached + * the same point in the calculation. If this constraint + * is not met, it is possible that the other processes will + * read metadata generated at a future point in the + * computation. + * + * + * Return: Non-negative on success/Negative on failure or if + * write is not permitted. + * + * Programmer: John Mainzer + * 9/16/05 + * + * Modifications: + * + * None. + * + *------------------------------------------------------------------------- + */ +herr_t +H5C_flush_to_min_clean(H5F_t * f, + hid_t primary_dxpl_id, + hid_t secondary_dxpl_id, + H5C_t * cache_ptr) +{ + herr_t result; + herr_t ret_value = SUCCEED; + hbool_t first_flush = TRUE; + hbool_t write_permitted; + + FUNC_ENTER_NOAPI(H5C_flush_to_min_clean, FAIL) + + HDassert( cache_ptr ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + HDassert( cache_ptr->skip_file_checks || f ); + + if ( cache_ptr->check_write_permitted != NULL ) { + + result = (cache_ptr->check_write_permitted)(f, + primary_dxpl_id, + &write_permitted); + + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Can't get write_permitted") + } + } else { + + write_permitted = cache_ptr->write_permitted; + } + + if ( ! write_permitted ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "cache write is not permitted!?!\n"); + } + + + result = H5C_make_space_in_cache(f, + primary_dxpl_id, + secondary_dxpl_id, + cache_ptr, + 0, + write_permitted, + &first_flush); + + if ( result < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "H5C_make_space_in_cache failed.") + } + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5C_flush_to_min_clean() */ + + +/*------------------------------------------------------------------------- * Function: H5C_get_cache_auto_resize_config * * Purpose: Copy the current configuration of the cache automatic @@ -2798,6 +3052,93 @@ done: /*------------------------------------------------------------------------- + * + * Function: H5C_get_entry_status + * + * Purpose: This function is used to determine whether the cache + * contains an entry with the specified base address. If + * the entry exists, it also reports some status information + * on the entry. + * + * Status information is reported in the locations pointed + * to by the size_ptr, in_cache_ptr, is_dirty_ptr, and + * is_protected_ptr. While in_cache_ptr must be defined, + * the remaining pointers may be NULL, in which case the + * associated data is not reported. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: John Mainzer + * 7/1/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +herr_t +H5C_get_entry_status(H5C_t * cache_ptr, + haddr_t addr, + size_t * size_ptr, + hbool_t * in_cache_ptr, + hbool_t * is_dirty_ptr, + hbool_t * is_protected_ptr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + H5C_cache_entry_t * entry_ptr = NULL; + + FUNC_ENTER_NOAPI(H5C_get_entry_status, FAIL) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + HDassert( H5F_addr_defined(addr) ); + HDassert( in_cache_ptr != NULL ); + + /* this test duplicates tow of the above asserts, but we need an + * invocation of HGOTO_ERROR to keep the compiler happy. + */ + if ( ( cache_ptr == NULL ) || ( cache_ptr->magic != H5C__H5C_T_MAGIC ) ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Bad cache_ptr on entry.") + } + + H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL) + + if ( entry_ptr == NULL ) { + + /* the entry doesn't exist in the cache -- report this + * and quit. + */ + *in_cache_ptr = FALSE; + + } else { + + *in_cache_ptr = TRUE; + + if ( size_ptr != NULL ) { + + *size_ptr = entry_ptr->size; + } + + if ( is_dirty_ptr != NULL ) { + + *is_dirty_ptr = entry_ptr->is_dirty; + } + + if ( is_protected_ptr != NULL ) { + + *is_protected_ptr = entry_ptr->is_protected; + } + } + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5C_get_entry_status() */ + + +/*------------------------------------------------------------------------- * Function: H5C_insert_entry * * Purpose: Adds the specified thing to the cache. The thing need not @@ -2845,6 +3186,10 @@ done: * This is part of a set of changes moving management of the * is_dirty field of H5C_cache_entry_t into the H5C code. * + * JRM -- 6/24/05 + * Added support for the new write_permitted field of + * the H5C_t structure. + * *------------------------------------------------------------------------- */ @@ -2877,6 +3222,21 @@ H5C_insert_entry(H5F_t * f, HDassert( H5F_addr_defined(addr) ); HDassert( thing ); +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_verify_not_in_index(cache_ptr, (H5C_cache_entry_t *)thing) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "thing already in index.\n"); + } +#endif /* H5C_DO_SANITY_CHECKS */ + +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + set_flush_marker = ( (flags & H5C__SET_FLUSH_MARKER_FLAG) != 0 ); entry_ptr = (H5C_cache_entry_t *)thing; @@ -2897,6 +3257,10 @@ H5C_insert_entry(H5F_t * f, entry_ptr->in_slist = FALSE; +#ifdef H5_HAVE_PARALLEL + entry_ptr->clear_on_unprotect = FALSE; +#endif /* H5_HAVE_PARALLEL */ + entry_ptr->ht_next = NULL; entry_ptr->ht_prev = NULL; @@ -2925,6 +3289,9 @@ H5C_insert_entry(H5F_t * f, HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, \ "Can't get write_permitted") } + } else { + + write_permitted = cache_ptr->write_permitted; } HDassert( entry_ptr->size <= H5C_MAX_ENTRY_SIZE ); @@ -3022,10 +3389,26 @@ H5C_insert_entry(H5F_t * f, H5C__UPDATE_RP_FOR_INSERTION(cache_ptr, entry_ptr, FAIL) +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + H5C__UPDATE_STATS_FOR_INSERTION(cache_ptr, entry_ptr) done: +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + FUNC_LEAVE_NOAPI(ret_value) } /* H5C_insert_entry() */ @@ -3033,6 +3416,171 @@ done: /*------------------------------------------------------------------------- * + * Function: H5C_mark_entries_as_clean + * + * Purpose: When the H5C code is used to implement the metadata caches + * in PHDF5, only the cache with MPI_rank 0 is allowed to + * actually write entries to disk -- all other caches must + * retain dirty entries until they are advised that the + * entries are clean. + * + * This function exists to allow the H5C code to receive these + * notifications. + * + * The function receives a list of entry base addresses + * which must refer to dirty entries in the cache. If any + * of the entries are either clean or don't exist, the + * function flags an error. + * + * The function scans the list of entries and flushes all + * those that are currently unprotected with the + * H5C__FLUSH_CLEAR_ONLY_FLAG. Those that are currently + * protected are flagged for clearing when they are + * unprotected. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: John Mainzer + * 7/5/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#ifdef H5_HAVE_PARALLEL +herr_t +H5C_mark_entries_as_clean(H5F_t * f, + hid_t primary_dxpl_id, + hid_t secondary_dxpl_id, + H5C_t * cache_ptr, + int32_t ce_array_len, + haddr_t * ce_array_ptr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + hbool_t first_flush = TRUE; + int i; + haddr_t addr; +#if H5C_DO_SANITY_CHECKS + haddr_t last_addr; +#endif /* H5C_DO_SANITY_CHECKS */ + H5C_cache_entry_t * entry_ptr = NULL; + + FUNC_ENTER_NOAPI(H5C_mark_entries_as_clean, FAIL) + + HDassert( cache_ptr ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + HDassert( cache_ptr->skip_file_checks || f ); + + HDassert( ce_array_len > 0 ); + HDassert( ce_array_ptr != NULL ); + +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HDassert(0); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + + for ( i = 0; i < ce_array_len; i++ ) + { + addr = ce_array_ptr[i]; + +#if H5C_DO_SANITY_CHECKS + if ( i == 0 ) { + + last_addr = addr; + + } else { + + if ( last_addr == addr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Duplicate entry in cleaned list.\n"); + + } else if ( last_addr > addr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "cleaned list not sorted.\n"); + } + } + +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HDassert(0); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ +#endif /* H5C_DO_SANITY_CHECKS */ + + HDassert( H5F_addr_defined(addr) ); + + H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL) + + if ( entry_ptr == NULL ) { +#if H5C_DO_SANITY_CHECKS + HDfprintf(stdout, + "H5C_mark_entries_as_clean: entry[%d] = %ld not in cache.\n", + (int)i, + (long)addr); +#endif /* H5C_DO_SANITY_CHECKS */ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Listed entry not in cache?!?!?.") + + } else if ( ! entry_ptr->is_dirty ) { + +#if H5C_DO_SANITY_CHECKS + HDfprintf(stdout, + "H5C_mark_entries_as_clean: entry %ld is not dirty!?!\n", + (long)addr); +#endif /* H5C_DO_SANITY_CHECKS */ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Listed entry not dirty?!?!?.") + + } else if ( entry_ptr->is_protected ) { + + entry_ptr->clear_on_unprotect = TRUE; + + } else { + + if ( H5C_flush_single_entry(f, + primary_dxpl_id, + secondary_dxpl_id, + cache_ptr, + entry_ptr->type, + addr, + H5C__FLUSH_CLEAR_ONLY_FLAG, + &first_flush, + TRUE) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") + } + } + } + +done: + +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HDassert(0); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5C_mark_entries_as_clean() */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- + * * Function: H5C_rename_entry * * Purpose: Use this function to notify the cache that an entry's @@ -3077,6 +3625,15 @@ H5C_rename_entry(H5C_t * cache_ptr, HDassert( H5F_addr_defined(new_addr) ); HDassert( H5F_addr_ne(old_addr, new_addr) ); +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + + H5C__SEARCH_INDEX(cache_ptr, old_addr, entry_ptr, FAIL) if ( ( entry_ptr == NULL ) || ( entry_ptr->type != type ) ) @@ -3146,6 +3703,14 @@ H5C_rename_entry(H5C_t * cache_ptr, done: +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + FUNC_LEAVE_NOAPI(ret_value) } /* H5C_rename_entry() */ @@ -3204,6 +3769,9 @@ done: * forces an immediate reduction in cache size. Modified * the code to deal with this eventuallity. * + * JRM -- 6/24/05 + * Added support for the new write_permitted field of H5C_t. + * *------------------------------------------------------------------------- */ @@ -3237,6 +3805,15 @@ H5C_protect(H5F_t * f, HDassert( type->load ); HDassert( H5F_addr_defined(addr) ); +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HDassert(0); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, NULL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + /* first check to see if the target is in cache */ H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, NULL) @@ -3284,6 +3861,8 @@ H5C_protect(H5F_t * f, } } else { + write_permitted = cache_ptr->write_permitted; + have_write_permitted = TRUE; } @@ -3390,6 +3969,8 @@ H5C_protect(H5F_t * f, } } else { + write_permitted = cache_ptr->write_permitted; + have_write_permitted = TRUE; } } @@ -3439,6 +4020,15 @@ H5C_protect(H5F_t * f, done: +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HDassert(0); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, NULL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + FUNC_LEAVE_NOAPI(ret_value) } /* H5C_protect() */ @@ -3796,6 +4386,10 @@ done: * JRM -- 7/21/04 * Updated function for the addition of the hash table. * + * JRM -- 9/8/05 + * Updated function for the addition of cache entry size + * change statistics. + * *------------------------------------------------------------------------- */ @@ -3819,6 +4413,8 @@ H5C_stats(H5C_t * cache_ptr, int64_t total_flushes = 0; int64_t total_evictions = 0; int64_t total_renames = 0; + int64_t total_size_increases = 0; + int64_t total_size_decreases = 0; int32_t aggregate_max_accesses = 0; int32_t aggregate_min_accesses = 1000000; int32_t aggregate_max_clears = 0; @@ -3845,13 +4441,15 @@ H5C_stats(H5C_t * cache_ptr, for ( i = 0; i <= cache_ptr->max_type_id; i++ ) { - total_hits += cache_ptr->hits[i]; - total_misses += cache_ptr->misses[i]; - total_insertions += cache_ptr->insertions[i]; - total_clears += cache_ptr->clears[i]; - total_flushes += cache_ptr->flushes[i]; - total_evictions += cache_ptr->evictions[i]; - total_renames += cache_ptr->renames[i]; + total_hits += cache_ptr->hits[i]; + total_misses += cache_ptr->misses[i]; + total_insertions += cache_ptr->insertions[i]; + total_clears += cache_ptr->clears[i]; + total_flushes += cache_ptr->flushes[i]; + total_evictions += cache_ptr->evictions[i]; + total_renames += cache_ptr->renames[i]; + total_size_increases += cache_ptr->size_increases[i]; + total_size_decreases += cache_ptr->size_decreases[i]; #if H5C_COLLECT_CACHE_ENTRY_STATS if ( aggregate_max_accesses < cache_ptr->max_accesses[i] ) aggregate_max_accesses = cache_ptr->max_accesses[i]; @@ -3963,6 +4561,10 @@ H5C_stats(H5C_t * cache_ptr, (long)total_insertions, (long)total_renames); + HDfprintf(stdout, " Total entry size incrs / decrs = %ld / %ld\n", + (long)total_size_increases, + (long)total_size_decreases); + #if H5C_COLLECT_CACHE_ENTRY_STATS HDfprintf(stdout, " aggregate max / min accesses = %d / %d\n", @@ -4014,6 +4616,11 @@ H5C_stats(H5C_t * cache_ptr, (long)(cache_ptr->insertions[i]), (long)(cache_ptr->renames[i])); + HDfprintf(stdout, + " size increases / decreases = %ld / %ld\n", + (long)(cache_ptr->size_increases[i]), + (long)(cache_ptr->size_decreases[i])); + #if H5C_COLLECT_CACHE_ENTRY_STATS HDfprintf(stdout, @@ -4061,6 +4668,9 @@ done: * JRM - 7/21/04 * Updated for hash table related statistics. * + * JRM - 9/8/05 + * Updated for size increase / decrease statistics. + * *------------------------------------------------------------------------- */ @@ -4084,6 +4694,8 @@ H5C_stats__reset(H5C_t * cache_ptr) cache_ptr->flushes[i] = 0; cache_ptr->evictions[i] = 0; cache_ptr->renames[i] = 0; + cache_ptr->size_increases[i] = 0; + cache_ptr->size_decreases[i] = 0; } cache_ptr->total_ht_insertions = 0; @@ -4176,6 +4788,27 @@ H5C_stats__reset(H5C_t * cache_ptr) * field into the cache code. This has become necessary * to repair a cache coherency bug in PHDF5. * + * JRM -- 7/5/05 + * Added code supporting the new clear_on_unprotect field + * of H5C_cache_entry_t. This change is also part of the + * above mentioned cache coherency bug fix in PHDF5. + * + * JRM -- 9/8/05 + * Added the size_changed and new_size parameters and the + * supporting code. Since the metadata cache synchronizes + * on dirty bytes creation in the PHDF5 case, we must now + * track changes in entry size. + * + * Note that the new_size parameter is ignored unless the + * size_changed parameter is TRUE. In this case, the new_size + * must be positive. + * + * Also observe that if size_changed is TRUE, dirtied must be + * TRUE. + * + * JRM -- 9/23/05 + * Moved the size_changed parameter into flags. + * *------------------------------------------------------------------------- */ herr_t @@ -4186,16 +4819,27 @@ H5C_unprotect(H5F_t * f, const H5C_class_t * type, haddr_t addr, void * thing, - unsigned int flags) + unsigned int flags, + size_t new_size) { hbool_t deleted; + hbool_t dirtied; hbool_t set_flush_marker; + hbool_t size_changed; +#ifdef H5_HAVE_PARALLEL + hbool_t clear_entry = FALSE; +#endif /* H5_HAVE_PARALLEL */ herr_t ret_value = SUCCEED; /* Return value */ H5C_cache_entry_t * entry_ptr; H5C_cache_entry_t * test_entry_ptr; FUNC_ENTER_NOAPI(H5C_unprotect, FAIL) + deleted = ( (flags & H5C__DELETED_FLAG) != 0 ); + dirtied = ( (flags & H5C__DIRTIED_FLAG) != 0 ); + set_flush_marker = ( (flags & H5C__SET_FLUSH_MARKER_FLAG) != 0 ); + size_changed = ( (flags & H5C__SIZE_CHANGED_FLAG) != 0 ); + HDassert( cache_ptr ); HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); HDassert( cache_ptr->skip_file_checks || f ); @@ -4204,15 +4848,52 @@ H5C_unprotect(H5F_t * f, HDassert( type->flush ); HDassert( H5F_addr_defined(addr) ); HDassert( thing ); - - deleted = ( (flags & H5C__DELETED_FLAG) != 0 ); - set_flush_marker = ( (flags & H5C__SET_FLUSH_MARKER_FLAG) != 0 ); + HDassert( ( size_changed == TRUE ) || ( size_changed == FALSE ) ); + HDassert( ( ! size_changed ) || ( dirtied ) ); + HDassert( ( ! size_changed ) || ( new_size > 0 ) ); entry_ptr = (H5C_cache_entry_t *)thing; HDassert( entry_ptr->addr == addr ); HDassert( entry_ptr->type == type ); +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + +#ifdef H5_HAVE_PARALLEL + /* When the H5C code is used to implement the metadata cache in the + * PHDF5 case, only the cache on process 0 is allowed to write to file. + * All the other metadata caches must hold dirty entries until they + * are told that the entries are clean. + * + * The clear_on_unprotect flag in the H5C_cache_entry_t structure + * exists to deal with the case in which an entry is protected when + * its cache receives word that the entry is now clean. In this case, + * the clear_on_unprotect flag is set, and the entry is flushed with + * the H5C__FLUSH_CLEAR_ONLY_FLAG. + * + * All this is a bit awkward, but until the metadata cache entries + * are contiguous, with only one dirty flag, we have to let the supplied + * functions deal with the reseting the is_dirty flag. + */ + if ( entry_ptr->clear_on_unprotect ) { + + HDassert( entry_ptr->is_dirty ); + + entry_ptr->clear_on_unprotect = FALSE; + + if ( ! dirtied ) { + + clear_entry = TRUE; + } + } +#endif /* H5_HAVE_PARALLEL */ + if ( ! (entry_ptr->is_protected) ) { HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ @@ -4220,13 +4901,40 @@ H5C_unprotect(H5F_t * f, } /* mark the entry as dirty if appropriate */ - entry_ptr->is_dirty = ( (entry_ptr->is_dirty) || (flags & H5AC__DIRTIED_FLAG) ); + entry_ptr->is_dirty = ( (entry_ptr->is_dirty) || dirtied ); + + /* update for change in entry size if necessary */ + if ( ( size_changed ) && ( entry_ptr->size != new_size ) ) { + + /* update the protected list */ + H5C__DLL_UPDATE_FOR_SIZE_CHANGE((cache_ptr->pl_len), \ + (cache_ptr->pl_size), \ + (entry_ptr->size), (new_size)); + + /* update the hash table */ + H5C__UPDATE_INDEX_FOR_SIZE_CHANGE((cache_ptr), (entry_ptr->size),\ + (new_size)); + + /* if the entry is in the skip list, update that too */ + if ( entry_ptr->in_slist ) { + + H5C__UPDATE_SLIST_FOR_SIZE_CHANGE((cache_ptr), (entry_ptr->size),\ + (new_size)); + } + + /* update statistics just before changing the entry size */ + H5C__UPDATE_STATS_FOR_ENTRY_SIZE_CHANGE((cache_ptr), (entry_ptr), \ + (new_size)); + + /* finally, update the entry size proper */ + entry_ptr->size = new_size; + } H5C__UPDATE_RP_FOR_UNPROTECT(cache_ptr, entry_ptr, FAIL) entry_ptr->is_protected = FALSE; - /* if the entry is dirty, or its flush_marker with the set flush flag, + /* if the entry is dirty, 'or' its flush_marker with the set flush flag, * and then add it to the skip list if it isn't there already. */ @@ -4287,11 +4995,57 @@ H5C_unprotect(H5F_t * f, HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "Can't flush.") } } +#ifdef H5_HAVE_PARALLEL + else if ( clear_entry ) { + + /* the following first flush flag will never be used as we are + * calling H5C_flush_single_entry with the H5C__FLUSH_CLEAR_ONLY_FLAG + * flag. However, it is needed for the function call. + */ + hbool_t dummy_first_flush = TRUE; + + /* verify that the target entry is in the cache. */ + + H5C__SEARCH_INDEX(cache_ptr, addr, test_entry_ptr, FAIL) + + if ( test_entry_ptr == NULL ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "entry not in hash table?!?.") + } + else if ( test_entry_ptr != entry_ptr ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ + "hash table contains multiple entries for addr?!?.") + } + + if ( H5C_flush_single_entry(f, + primary_dxpl_id, + secondary_dxpl_id, + cache_ptr, + type, + addr, + H5C__FLUSH_CLEAR_ONLY_FLAG, + &dummy_first_flush, + TRUE) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "Can't clear.") + } + } +#endif /* H5_HAVE_PARALLEL */ H5C__UPDATE_STATS_FOR_UNPROTECT(cache_ptr) done: +#if H5C_DO_EXTREME_SANITY_CHECKS + if ( H5C_validate_lru_list(cache_ptr) < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "LRU sanity check failed.\n"); + } +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + FUNC_LEAVE_NOAPI(ret_value) } /* H5C_unprotect() */ @@ -5656,6 +6410,18 @@ done: * H5C__FLUSH_INVALIDATE_FLAG and H5C__FLUSH_CLEAR_ONLY_FLAG * respectively. * + * JRM -- 6/24/05 + * Added code to remove dirty entries from the slist after + * they have been flushed. Also added a sanity check that + * will scream if we attempt a write when writes are + * completely disabled. + * + * JRM -- 7/5/05 + * Added code to call the new log_flush callback whenever + * a dirty entry is written to disk. Note that the callback + * is not called if the H5C__FLUSH_CLEAR_ONLY_FLAG is set, + * as there is no write to file in this case. + * *------------------------------------------------------------------------- */ static herr_t @@ -5671,8 +6437,10 @@ H5C_flush_single_entry(H5F_t * f, { hbool_t destroy; hbool_t clear_only; + hbool_t was_dirty; herr_t ret_value = SUCCEED; /* Return value */ herr_t status; + int type_id; H5C_cache_entry_t * entry_ptr = NULL; FUNC_ENTER_NOAPI_NOINIT(H5C_flush_single_entry) @@ -5779,6 +6547,9 @@ H5C_flush_single_entry(H5F_t * f, #endif /* NDEBUG */ #endif /* H5_HAVE_PARALLEL */ + was_dirty = entry_ptr->is_dirty; + type_id = entry_ptr->type->id; + entry_ptr->flush_marker = FALSE; if ( clear_only ) { @@ -5928,6 +6699,16 @@ H5C_flush_single_entry(H5F_t * f, } } else { +#if H5C_DO_SANITY_CHECKS + if ( ( entry_ptr->is_dirty ) && + ( cache_ptr->check_write_permitted == NULL ) && + ( ! (cache_ptr->write_permitted) ) ) { + + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Write when writes are always forbidden!?!?!") + } +#endif /* H5C_DO_SANITY_CHECKS */ + /* Only block for all the processes on the first piece of metadata */ @@ -5951,10 +6732,28 @@ H5C_flush_single_entry(H5F_t * f, } } + if ( ( ! destroy ) && ( entry_ptr->in_slist ) ) { + + H5C__REMOVE_ENTRY_FROM_SLIST(cache_ptr, entry_ptr) + } + if ( ! destroy ) { HDassert( !(entry_ptr->is_dirty) ); HDassert( !(entry_ptr->flush_marker) ); + HDassert( !(entry_ptr->in_slist) ); + } + + if ( cache_ptr->log_flush ) { + + status = (cache_ptr->log_flush)(cache_ptr, addr, was_dirty, + flags, type_id); + + if ( status < 0 ) { + + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ + "log_flush callback failed.") + } } } @@ -6024,6 +6823,10 @@ H5C_load_entry(H5F_t * f, entry_ptr->type = type; entry_ptr->is_protected = FALSE; entry_ptr->in_slist = FALSE; + entry_ptr->flush_marker = FALSE; +#ifdef H5_HAVE_PARALLEL + entry_ptr->clear_on_unprotect = FALSE; +#endif /* H5_HAVE_PARALLEL */ if ( (type->size)(f, thing, &(entry_ptr->size)) < 0 ) { @@ -6271,3 +7074,203 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* H5C_make_space_in_cache() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5C_validate_lru_list + * + * Purpose: Debugging function that scans the LRU list for errors. + * + * If an error is detected, the function generates a + * diagnostic and returns FAIL. If no error is detected, + * the function returns SUCCEED. + * + * Return: FAIL if error is detected, SUCCEED otherwise. + * + * Programmer: John Mainzer, 7/14/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#if H5C_DO_EXTREME_SANITY_CHECKS + +static herr_t +H5C_validate_lru_list(H5C_t * cache_ptr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + int32_t len = 0; + size_t size = 0; + H5C_cache_entry_t * entry_ptr = NULL; + + FUNC_ENTER_NOAPI_NOINIT(H5C_validate_lru_list) + + HDassert( cache_ptr ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + + if ( ( ( cache_ptr->LRU_head_ptr == NULL ) + || + ( cache_ptr->LRU_tail_ptr == NULL ) + ) + && + ( cache_ptr->LRU_head_ptr != cache_ptr->LRU_tail_ptr ) + ) { + + HDfprintf(stdout,"H5C_validate_lru_list: Check 1 failed.\n"); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 1 failed") + } + + if ( ( cache_ptr->LRU_list_len < 0 ) || ( cache_ptr->LRU_list_size < 0 ) ) { + + HDfprintf(stdout,"H5C_validate_lru_list: Check 2 failed.\n"); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 2 failed") + } + + if ( ( cache_ptr->LRU_list_len == 1 ) + && + ( ( cache_ptr->LRU_head_ptr != cache_ptr->LRU_tail_ptr ) + || + ( cache_ptr->LRU_head_ptr == NULL ) + || + ( cache_ptr->LRU_head_ptr->size != cache_ptr->LRU_list_size ) + ) + ) { + + HDfprintf(stdout,"H5C_validate_lru_list: Check 3 failed.\n"); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 3 failed") + } + + if ( ( cache_ptr->LRU_list_len >= 1 ) + && + ( ( cache_ptr->LRU_head_ptr == NULL ) + || + ( cache_ptr->LRU_head_ptr->prev != NULL ) + || + ( cache_ptr->LRU_tail_ptr == NULL ) + || + ( cache_ptr->LRU_tail_ptr->next != NULL ) + ) + ) { + + HDfprintf(stdout,"H5C_validate_lru_list: Check 4 failed.\n"); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 4 failed") + } + + entry_ptr = cache_ptr->LRU_head_ptr; + while ( entry_ptr != NULL ) + { + + if ( ( entry_ptr != cache_ptr->LRU_head_ptr ) && + ( ( entry_ptr->prev == NULL ) || + ( entry_ptr->prev->next != entry_ptr ) ) ) { + + HDfprintf(stdout,"H5C_validate_lru_list: Check 5 failed.\n"); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 5 failed") + } + + if ( ( entry_ptr != cache_ptr->LRU_tail_ptr ) && + ( ( entry_ptr->next == NULL ) || + ( entry_ptr->next->prev != entry_ptr ) ) ) { + + HDfprintf(stdout,"H5C_validate_lru_list: Check 6 failed.\n"); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 6 failed") + } + + len++; + size += entry_ptr->size; + entry_ptr = entry_ptr->next; + } + + if ( ( cache_ptr->LRU_list_len != len ) || + ( cache_ptr->LRU_list_size != size ) ) { + + HDfprintf(stdout,"H5C_validate_lru_list: Check 7 failed.\n"); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Check 7 failed") + } + +done: + + if ( ret_value != SUCCEED ) { + + HDassert(0); + } + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5C_validate_lru_list() */ + +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + + +/*------------------------------------------------------------------------- + * + * Function: H5C_verify_not_in_index + * + * Purpose: Debugging function that scans the hash table to verify + * that the specified instance of H5C_cache_entry_t is not + * present. + * + * If an error is detected, the function generates a + * diagnostic and returns FAIL. If no error is detected, + * the function returns SUCCEED. + * + * Return: FAIL if error is detected, SUCCEED otherwise. + * + * Programmer: John Mainzer, 7/14/05 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +#if H5C_DO_EXTREME_SANITY_CHECKS + +static herr_t +H5C_verify_not_in_index(H5C_t * cache_ptr, + H5C_cache_entry_t * entry_ptr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + int32_t i; + int32_t depth; + H5C_cache_entry_t * scan_ptr = NULL; + + FUNC_ENTER_NOAPI_NOINIT(H5C_verify_not_in_index) + + HDassert( cache_ptr != NULL ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + HDassert( entry_ptr != NULL ); + + for ( i = 0; i < H5C__HASH_TABLE_LEN; i++ ) + { + depth = 0; + scan_ptr = cache_ptr->index[i]; + + while ( scan_ptr != NULL ) + { + if ( scan_ptr == entry_ptr ) { + + HDfprintf(stdout, + "H5C_verify_not_in_index: entry in index (%d/%d)\n", + i, depth); + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "Entry already in index.") + } + depth++; + scan_ptr = scan_ptr->ht_next; + } + } + +done: + + if ( ret_value != SUCCEED ) { + + HDassert(0); + } + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5C_verify_not_in_index() */ + +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ diff --git a/src/H5Cpkg.h b/src/H5Cpkg.h index 54a8f48..007f30f 100644 --- a/src/H5Cpkg.h +++ b/src/H5Cpkg.h @@ -79,9 +79,18 @@ * * JRM - 7/19/04 * + * The TBBT has since been replaced with a skip list. This change + * greatly predates this note. + * + * JRM - 9/26/05 + * * magic: Unsigned 32 bit integer always set to H5C__H5C_T_MAGIC. This * field is used to validate pointers to instances of H5C_t. * + * aux_ptr: Pointer to void used to allow wrapper code to associate + * its data with an instance of H5C_t. The H5C cache code + * sets this field to NULL, and otherwise leaves it alone. + * * max_type_id: Integer field containing the maximum type id number assigned * to a type of entry in the cache. All type ids from 0 to * max_type_id inclusive must be defined. The names of the @@ -110,6 +119,10 @@ * will attempt to reduce its size to the max_cache_size * limit on the next cache write. * + * c) When an entry increases in size, the cache may exceed + * the max_cache_size limit until the next time the cache + * attempts to load or insert an entry. + * * min_clean_size: Nominal minimum number of clean bytes in the cache. * The cache attempts to maintain this number of bytes of * clean data so as to avoid case b) above. Again, this is @@ -126,7 +139,14 @@ * a write is permissible at any given point in time. * * If no such function is specified (i.e. this field is NULL), - * the cache will presume that writes are always permissable. + * the cache uses the following write_permitted field to + * determine whether writes are permitted. + * + * write_permitted: If check_write_permitted is NULL, this boolean flag + * indicates whether writes are permitted. + * + * log_flush: If provided, this function is called whenever a dirty + * entry is flushed to disk. * * * The cache requires an index to facilitate searching for entries. The @@ -483,6 +503,16 @@ * id equal to the array index has been renamed in the current * epoch. * + * size_increases: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. + * The cells are used to record the number of times an entry + * with type id equal to the array index has increased in + * size in the current epoch. + * + * size_decreases: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. + * The cells are used to record the number of times an entry + * with type id equal to the array index has decreased in + * size in the current epoch. + * * total_ht_insertions: Number of times entries have been inserted into the * hash table in the current epoch. * @@ -580,6 +610,8 @@ struct H5C_t { uint32_t magic; + void * aux_ptr; + int32_t max_type_id; const char * (* type_name_table_ptr); @@ -587,6 +619,9 @@ struct H5C_t size_t min_clean_size; H5C_write_permitted_func_t check_write_permitted; + hbool_t write_permitted; + + H5C_log_flush_func_t log_flush; int32_t index_len; size_t index_size; @@ -646,6 +681,8 @@ struct H5C_t int64_t flushes[H5C__MAX_NUM_TYPE_IDS + 1]; int64_t evictions[H5C__MAX_NUM_TYPE_IDS + 1]; int64_t renames[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t size_increases[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t size_decreases[H5C__MAX_NUM_TYPE_IDS + 1]; int64_t total_ht_insertions; int64_t total_ht_deletions; diff --git a/src/H5Cprivate.h b/src/H5Cprivate.h index 7c0151b..57d74af 100644 --- a/src/H5Cprivate.h +++ b/src/H5Cprivate.h @@ -36,6 +36,7 @@ #include "H5Fprivate.h" /* File access */ #define H5C_DO_SANITY_CHECKS 0 +#define H5C_DO_EXTREME_SANITY_CHECKS 0 /* This sanity checking constant was picked out of the air. Increase * or decrease it if appropriate. Its purposes is to detect corrupt @@ -149,6 +150,12 @@ typedef herr_t (*H5C_write_permitted_func_t)(const H5F_t *f, hid_t dxpl_id, hbool_t * write_permitted_ptr); +typedef herr_t (*H5C_log_flush_func_t)(H5C_t * cache_ptr, + haddr_t addr, + hbool_t was_dirty, + unsigned flags, + int type_id); + /* Upper and lower limits on cache size. These limits are picked * out of a hat -- you should be able to change them as necessary. * @@ -180,8 +187,9 @@ typedef herr_t (*H5C_write_permitted_func_t)(const H5F_t *f, * * In typical application, this structure is the first field in a * structure to be cached. For historical reasons, the external module - * is responsible for managing the is_dirty field. All other fields are - * managed by the cache. + * is responsible for managing the is_dirty field (this is no longer + * completely true. See the comment on the is_dirty field for details). + * All other fields are managed by the cache. * * The fields of this structure are discussed individually below: * @@ -220,6 +228,12 @@ typedef herr_t (*H5C_write_permitted_func_t)(const H5F_t *f, * someday. However it will require a change in the * cache interface. * + * Update: Management of the is_dirty field has been largely + * moved into the cache. The only remaining exceptions + * are the flush and clear functions supplied by the + * modules using the cache. These still clear the + * is_dirty field as before. -- JRM 7/5/05 + * * is_protected: Boolean flag indicating whether this entry is protected * (or locked, to use more conventional terms). When it is * protected, the entry cannot be flushed or accessed until @@ -239,6 +253,18 @@ typedef herr_t (*H5C_write_permitted_func_t)(const H5F_t *f, * H5AC__FLUSH_MARKED_ENTRIES_FLAG. The flag is reset when * the entry is flushed for whatever reason. * + * clear_on_unprotect: Boolean flag used only in PHDF5. When H5C is used + * to implement the metadata cache In the parallel case, only + * the cache with mpi rank 0 is allowed to actually write to + * file -- all other caches must retain dirty entries until they + * are advised that the entry is clean. + * + * This flag is used in the case that such an advisory is + * received when the entry is protected. If it is set when an + * entry is unprotected, and the dirtied flag is not set in + * the unprotect, the entry's is_dirty flag is reset by flushing + * it with the H5C__FLUSH_CLEAR_ONLY_FLAG. + * * * Fields supporting the hash table: * @@ -344,6 +370,9 @@ typedef struct H5C_cache_entry_t hbool_t is_protected; hbool_t in_slist; hbool_t flush_marker; +#ifdef H5_HAVE_PARALLEL + hbool_t clear_on_unprotect; +#endif /* H5_HAVE_PARALLEL */ /* fields supporting the hash table: */ @@ -676,20 +705,24 @@ typedef struct H5C_auto_size_ctl_t #define H5C__SET_FLUSH_MARKER_FLAG 0x0001 #define H5C__DELETED_FLAG 0x0002 -/* This flag applies only to H5C_unprotect() */ +/* These flags applies only to H5C_unprotect() */ #define H5C__DIRTIED_FLAG 0x0004 +#define H5C__SIZE_CHANGED_FLAG 0x0008 /* These flags apply to H5C_flush() & H5C_flush_single_entry() */ -#define H5C__FLUSH_INVALIDATE_FLAG 0x0008 -#define H5C__FLUSH_CLEAR_ONLY_FLAG 0x0010 -#define H5C__FLUSH_MARKED_ENTRIES_FLAG 0x0020 +#define H5C__FLUSH_INVALIDATE_FLAG 0x0010 +#define H5C__FLUSH_CLEAR_ONLY_FLAG 0x0020 +#define H5C__FLUSH_MARKED_ENTRIES_FLAG 0x0040 H5_DLL H5C_t * H5C_create(size_t max_cache_size, size_t min_clean_size, int max_type_id, - const char * (* type_name_table_ptr), - H5C_write_permitted_func_t check_write_permitted); + const char * (* type_name_table_ptr), + H5C_write_permitted_func_t check_write_permitted, + hbool_t write_permitted, + H5C_log_flush_func_t log_flush, + void * aux_ptr); H5_DLL void H5C_def_auto_resize_rpt_fcn(H5C_t * cache_ptr, int32_t version, @@ -713,6 +746,11 @@ H5_DLL herr_t H5C_flush_cache(H5F_t * f, H5C_t * cache_ptr, unsigned flags); +H5_DLL herr_t H5C_flush_to_min_clean(H5F_t * f, + hid_t primary_dxpl_id, + hid_t secondary_dxpl_id, + H5C_t * cache_ptr); + H5_DLL herr_t H5C_get_cache_auto_resize_config(H5C_t * cache_ptr, H5C_auto_size_ctl_t *config_ptr); @@ -725,6 +763,13 @@ H5_DLL herr_t H5C_get_cache_size(H5C_t * cache_ptr, H5_DLL herr_t H5C_get_cache_hit_rate(H5C_t * cache_ptr, double * hit_rate_ptr); +H5_DLL herr_t H5C_get_entry_status(H5C_t * cache_ptr, + haddr_t addr, + size_t * size_ptr, + hbool_t * in_cache_ptr, + hbool_t * is_dirty_ptr, + hbool_t * is_protected_ptr); + H5_DLL herr_t H5C_insert_entry(H5F_t * f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, @@ -734,6 +779,13 @@ H5_DLL herr_t H5C_insert_entry(H5F_t * f, void * thing, unsigned int flags); +H5_DLL herr_t H5C_mark_entries_as_clean(H5F_t * f, + hid_t primary_dxpl_id, + hid_t secondary_dxpl_id, + H5C_t * cache_ptr, + int32_t ce_array_len, + haddr_t * ce_array_ptr); + H5_DLL herr_t H5C_rename_entry(H5C_t * cache_ptr, const H5C_class_t * type, haddr_t old_addr, @@ -770,7 +822,8 @@ H5_DLL herr_t H5C_unprotect(H5F_t * f, const H5C_class_t * type, haddr_t addr, void * thing, - unsigned flags); + unsigned int flags, + size_t new_size); H5_DLL herr_t H5C_validate_resize_config(H5C_auto_size_ctl_t * config_ptr, unsigned int tests); @@ -2034,7 +2034,7 @@ H5D_update_entry_info(H5F_t *file, hid_t dxpl_id, H5D_t *dset, H5P_genplist_t *p #endif /* H5O_ENABLE_BOGUS */ /* Add a modification time message. */ - if (H5O_touch_oh(file, oh, TRUE, &oh_flags) < 0) + if (H5O_touch_oh(file, dxpl_id, oh, TRUE, &oh_flags) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to update modification time message") done: @@ -67,7 +67,8 @@ static int H5F_flush_all_cb(void *f, hid_t fid, void *_invalidate); static unsigned H5F_get_objects(const H5F_t *f, unsigned types, int max_objs, hid_t *obj_id_list); static int H5F_get_objects_cb(void *obj_ptr, hid_t obj_id, void *key); static herr_t H5F_get_vfd_handle(const H5F_t *file, hid_t fapl, void** file_handle); -static H5F_t *H5F_new(H5F_file_t *shared, hid_t fcpl_id, hid_t fapl_id); +static H5F_t *H5F_new(H5F_file_t *shared, hid_t fcpl_id, hid_t fapl_id, + H5FD_t *lf); static herr_t H5F_dest(H5F_t *f, hid_t dxpl_id); static herr_t H5F_flush(H5F_t *f, hid_t dxpl_id, H5F_scope_t scope, unsigned flags); static herr_t H5F_close(H5F_t *f); @@ -1426,10 +1427,16 @@ done: * Updated for the new metadata cache, and associated * property list changes. * + * J Mainzer, Jun 30, 2005 + * Added lf parameter so the shared->lf field can be + * initialized prior to the call to H5AC_create() if a + * new instance of H5F_file_t is created. lf should be + * NULL if shared isn't, and vise versa. + * *------------------------------------------------------------------------- */ static H5F_t * -H5F_new(H5F_file_t *shared, hid_t fcpl_id, hid_t fapl_id) +H5F_new(H5F_file_t *shared, hid_t fcpl_id, hid_t fapl_id, H5FD_t *lf) { H5F_t *f=NULL, *ret_value; H5P_genplist_t *plist; /* Property list */ @@ -1441,14 +1448,17 @@ H5F_new(H5F_file_t *shared, hid_t fcpl_id, hid_t fapl_id) f->file_id = -1; if (shared) { + HDassert( lf == NULL ); f->shared = shared; } else { + HDassert( lf != NULL ); f->shared = H5FL_CALLOC(H5F_file_t); f->shared->super_addr = HADDR_UNDEF; f->shared->base_addr = HADDR_UNDEF; f->shared->freespace_addr = HADDR_UNDEF; f->shared->driver_addr = HADDR_UNDEF; - + f->shared->lf = lf; + /* * Copy the file creation and file access property lists into the * new file handle. We do this early because some values might need @@ -1803,10 +1813,10 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id, hid_t d HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "file is already open for read-only") /* Allocate new "high-level" file struct */ - if ((file = H5F_new(shared, fcpl_id, fapl_id)) == NULL) + if ((file = H5F_new(shared, fcpl_id, fapl_id, NULL)) == NULL) HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to create new file object") - lf = file->shared->lf; + lf = file->shared->lf; } else if (flags!=tent_flags) { /* * This file is not yet open by the library and the flags we used to @@ -1821,20 +1831,18 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id, hid_t d file = NULL; /*to prevent destruction of wrong file*/ HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open file") } - if (NULL==(file = H5F_new(NULL, fcpl_id, fapl_id))) + if (NULL==(file = H5F_new(NULL, fcpl_id, fapl_id, lf))) HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to create new file object") file->shared->flags = flags; - file->shared->lf = lf; } else { /* * This file is not yet open by the library and our tentative opening * above is good enough. */ - if (NULL==(file = H5F_new(NULL, fcpl_id, fapl_id))) + if (NULL==(file = H5F_new(NULL, fcpl_id, fapl_id, lf))) HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to create new file object") file->shared->flags = flags; - file->shared->lf = lf; } /* Short cuts */ @@ -2841,7 +2849,7 @@ H5Freopen(hid_t file_id) HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file") /* Get a new "top level" file struct, sharing the same "low level" file struct */ - if (NULL==(new_file=H5F_new(old_file->shared, H5P_FILE_CREATE_DEFAULT, H5P_FILE_ACCESS_DEFAULT))) + if (NULL==(new_file=H5F_new(old_file->shared, H5P_FILE_CREATE_DEFAULT, H5P_FILE_ACCESS_DEFAULT, NULL))) HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, FAIL, "unable to reopen file") /* Keep old file's read/write intent in new file */ @@ -3389,6 +3397,40 @@ H5F_mpi_get_comm(const H5F_t *f) done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5F_mpi_get_comm() */ + + +/*------------------------------------------------------------------------- + * Function: H5F_mpi_get_size + * + * Purpose: Retrieves the size of an MPI process. + * + * Return: Success: The size (positive) + * + * Failure: Negative + * + * Programmer: John Mainzer + * Friday, May 6, 2005 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +int +H5F_mpi_get_size(const H5F_t *f) +{ + int ret_value; + + FUNC_ENTER_NOAPI(H5F_mpi_get_size, FAIL) + + assert(f && f->shared); + + /* Dispatch to driver */ + if ((ret_value=H5FD_mpi_get_size(f->shared->lf))<0) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "driver get_size request failed") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5F_mpi_get_size() */ #endif /* H5_HAVE_PARALLEL */ @@ -2328,7 +2328,7 @@ H5FD_free(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, hsize_t si H5_ASSIGN_OVERFLOW(overlap_size,(file->accum_loc+file->accum_size)-addr,haddr_t,size_t); /* Block to free is in the middle of the accumulator */ - if(H5F_addr_lt(addr,file->accum_loc+file->accum_size)) { + if(H5F_addr_lt((addr + size), file->accum_loc + file->accum_size)) { haddr_t tail_addr; size_t tail_size; diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index f285f1a..3cf1968 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -934,6 +934,13 @@ done: * * Modifications: * + * John Mainzer -- 9/21/05 + * Modified code to turn off the + * H5FD_FEAT_ACCUMULATE_METADATA_WRITE flag. + * With the movement of + * all cache writes to process 0, this flag has become + * problematic in PHDF5. + * *------------------------------------------------------------------------- */ static herr_t @@ -947,15 +954,6 @@ H5FD_mpio_query(const H5FD_t UNUSED *_file, unsigned long *flags /* out */) if(flags) { *flags=0; *flags|=H5FD_FEAT_AGGREGATE_METADATA; /* OK to aggregate metadata allocations */ - - /* Distinguish between updating the metadata accumulator on writes and - * reads. This is particularly (perhaps only, even) important for MPI-I/O - * where we guarantee that writes are collective, but reads may not be. - * If we were to allow the metadata accumulator to be written during a - * read operation, the application would hang. - */ - *flags|=H5FD_FEAT_ACCUMULATE_METADATA_WRITE; /* OK to accumulate metadata for faster writes */ - *flags|=H5FD_FEAT_AGGREGATE_SMALLDATA; /* OK to aggregate "small" raw data allocations */ } /* end if */ @@ -1553,9 +1551,18 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, if(H5P_get(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,&block_before_meta_write)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get H5AC property") +#if 0 /* JRM */ + /* The metadata cache now only writes from process 0, which makes + * this synchronization incorrect. I'm leaving this code commented + * out instead of deleting it to remind us that we should re-write + * this function so that a metadata write from any other process + * should flag an error. + * -- JRM 9/1/05 + */ if(block_before_meta_write) if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) +#endif /* JRM */ /* Only one process will do the actual write if all procs in comm write same metadata */ if (file->mpi_rank != H5_PAR_META_WRITE) { @@ -1616,11 +1623,22 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, file->eof = HADDR_UNDEF; done: - /* if only one process writes, need to broadcast the ret_value to other processes */ + +#if 0 /* JRM */ + /* Since metadata writes are now done by process 0 only, this broadcast + * is no longer needed. I leave it in and commented out to remind us + * that we need to re-work this function to reflect this reallity. + * + * -- JRM 9/1/05 + */ + /* if only one process writes, need to broadcast the ret_value to + * other processes + */ if (type!=H5FD_MEM_DRAW) { if (MPI_SUCCESS != (mpi_code=MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm))) HMPI_DONE_ERROR(FAIL, "MPI_Bcast failed", mpi_code) } /* end if */ +#endif /* JRM */ #ifdef H5FDmpio_DEBUG if (H5FD_mpio_Debug[(int)'t']) diff --git a/src/H5FDmpiposix.c b/src/H5FDmpiposix.c index 11e7849..de491f0 100644 --- a/src/H5FDmpiposix.c +++ b/src/H5FDmpiposix.c @@ -910,6 +910,12 @@ done: * * Modifications: * + * John Mainzer -- 9/21/05 + * Modified code to turn off the + * H5FD_FEAT_ACCUMULATE_METADATA_WRITE flag. + * With the movement of all cache writes to process 0, + * this flag has become problematic in PHDF5. + * *------------------------------------------------------------------------- */ static herr_t @@ -923,15 +929,6 @@ H5FD_mpiposix_query(const H5FD_t UNUSED *_file, unsigned long *flags /* out */) if(flags) { *flags=0; *flags|=H5FD_FEAT_AGGREGATE_METADATA; /* OK to aggregate metadata allocations */ - - /* Distinguish between updating the metadata accumulator on writes and - * reads. This is particularly (perhaps only, even) important for MPI-I/O - * where we guarantee that writes are collective, but reads may not be. - * If we were to allow the metadata accumulator to be written during a - * read operation, the application would hang. - */ - *flags|=H5FD_FEAT_ACCUMULATE_METADATA_WRITE; /* OK to accumulate metadata for faster writes */ - *flags|=H5FD_FEAT_AGGREGATE_SMALLDATA; /* OK to aggregate "small" raw data allocations */ } /* end if */ @@ -1235,6 +1232,14 @@ H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list") /* Metadata specific actions */ + /* All metadata is now written from process 0 -- thus this function + * needs to be re-written to reflect this. For now I have simply + * commented out the code that attempts to synchronize metadata + * writes between processes, but we should really just flag an error + * whenever any process other than process 0 attempts to write + * metadata. + * -- JRM 9/1/05 + */ if(type!=H5FD_MEM_DRAW) { unsigned block_before_meta_write=0; /* Whether to block before a metadata write */ @@ -1252,9 +1257,11 @@ H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, if(H5P_get(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,&block_before_meta_write)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get H5AC property") +#if 0 /* JRM */ if(block_before_meta_write) if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) +#endif /* JRM */ /* Only one process will do the actual write if all procs in comm write same metadata */ if (file->mpi_rank != H5_PAR_META_WRITE) @@ -1328,6 +1335,14 @@ done: file->pos = HADDR_UNDEF; file->op = OP_UNKNOWN; } /* end if */ +#if 0 /* JRM */ + /* Since metadata writes are now done by process 0 only, this broadcast + * is no longer needed. I leave it in and commented out to remind us + * that we need to re-work this function to reflect this reallity. + * + * -- JRM 9/1/05 + */ + /* Guard against getting into metadata broadcast in failure cases */ else { /* when only one process writes, need to broadcast the ret_value to other processes */ @@ -1336,6 +1351,7 @@ done: HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) } /* end if */ } /* end else */ +#endif /* JRM */ FUNC_LEAVE_NOAPI(ret_value) } /* end H5FD_mpiposix_write() */ diff --git a/src/H5FDmulti.c b/src/H5FDmulti.c index 71b5c91..a1a065e 100644 --- a/src/H5FDmulti.c +++ b/src/H5FDmulti.c @@ -1628,7 +1628,14 @@ H5FD_multi_alloc(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, hsize_t size) if (HADDR_UNDEF==(addr=H5FDalloc(file->memb[mmt], type, dxpl_id, size))) H5Epush_ret(func, H5E_ERR_CLS, H5E_INTERNAL, H5E_BADVALUE, "member file can't alloc", HADDR_UNDEF) addr += file->fa.memb_addr[mmt]; - if (addr+size>file->eoa) file->eoa = addr+size; + if ( addr + size > file->eoa ) { + + if ( H5FD_multi_set_eoa(_file, addr + size) < 0 ) { + + H5Epush_ret(func, H5E_ERR_CLS, H5E_INTERNAL, H5E_BADVALUE, \ + "can't set eoa", HADDR_UNDEF) + } + } return addr; } diff --git a/src/H5Fprivate.h b/src/H5Fprivate.h index b9d9d74..53d3f05 100644 --- a/src/H5Fprivate.h +++ b/src/H5Fprivate.h @@ -448,6 +448,7 @@ H5_DLL haddr_t H5F_get_eoa(const H5F_t *f); #ifdef H5_HAVE_PARALLEL H5_DLL int H5F_mpi_get_rank(const H5F_t *f); H5_DLL MPI_Comm H5F_mpi_get_comm(const H5F_t *f); +H5_DLL int H5F_mpi_get_size(const H5F_t *f); #endif /* H5_HAVE_PARALLEL */ /* Functions than check file mounting information */ @@ -155,7 +155,6 @@ H5HL_create(H5F_t *f, hid_t dxpl_id, size_t size_hint, haddr_t *addr_p/*out*/) heap->addr = *addr_p + (hsize_t)sizeof_hdr; heap->disk_alloc = size_hint; heap->mem_alloc = size_hint; - heap->disk_resrv = 0; if (NULL==(heap->chunk = H5FL_BLK_CALLOC(heap_chunk,(sizeof_hdr + size_hint)))) HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed"); @@ -320,6 +319,20 @@ done: * * Modifications: * + * John Mainzer, 8/10/05 + * Reworked this function for a different role. + * + * It used to be called during cache eviction, where it + * attempted to size the disk space allocation for the + * actuall size of the heap. However, this causes problems + * in the parallel case, as the reuslting disk allocations + * may not be synchronized. + * + * It is now called from H5HL_remove(), where it is used to + * reduce heap size in response to an entry deletion. This + * means that the function should either do nothing, or + * reduce the size of the disk allocation. + * *------------------------------------------------------------------------- */ static herr_t @@ -331,19 +344,13 @@ H5HL_minimize_heap_space(H5F_t *f, hid_t dxpl_id, H5HL_t *heap) FUNC_ENTER_NOAPI(H5HL_minimize_heap_space, FAIL) /* check args */ - assert(f); - assert(heap); + HDassert( f ); + HDassert( heap ); + HDassert( heap->disk_alloc == heap->mem_alloc ); sizeof_hdr = H5HL_SIZEOF_HDR(f); /* cache H5HL header size for file */ /* - * When the heap is being flushed to disk, release the file space reserved - * for it. - */ - H5MF_free_reserved(f, (hsize_t)heap->disk_resrv); - heap->disk_resrv = 0; - - /* * Check to see if we can reduce the size of the heap in memory by * eliminating free blocks at the tail of the buffer before flushing the * buffer out. @@ -427,13 +434,15 @@ H5HL_minimize_heap_space(H5F_t *f, hid_t dxpl_id, H5HL_t *heap) } /* - * If the heap grew larger or smaller than disk storage then move the + * If the heap grew smaller than disk storage then move the * data segment of the heap to another contiguous block of disk * storage. */ if (heap->mem_alloc != heap->disk_alloc) { haddr_t old_addr = heap->addr, new_addr; + HDassert( heap->mem_alloc < heap->disk_alloc ); + /* Release old space on disk */ H5_CHECK_OVERFLOW(heap->disk_alloc, size_t, hsize_t); H5MF_xfree(f, H5FD_MEM_LHEAP, dxpl_id, old_addr, (hsize_t)heap->disk_alloc); @@ -453,7 +462,8 @@ H5HL_minimize_heap_space(H5F_t *f, hid_t dxpl_id, H5HL_t *heap) done: FUNC_LEAVE_NOAPI(ret_value) -} + +} /* H5HL_minimize_heap_space() */ /*------------------------------------------------------------------------- @@ -543,6 +553,13 @@ H5HL_serialize(H5F_t *f, H5HL_t *heap, uint8_t *buf) * * Bill Wendling, 2003-09-16 * Separated out the bit that serializes the heap. + * + * John Mainzer, 2005-08-10 + * Removed call to H5HL_minimize_heap_space(). It does disk space + * allocation, which can cause problems if done at flush time. + * Instead, disk space allocation/deallocation is now done at + * insert/remove time. + * *------------------------------------------------------------------------- */ static herr_t @@ -553,21 +570,18 @@ H5HL_flush(H5F_t *f, hid_t dxpl_id, hbool_t destroy, haddr_t addr, H5HL_t *heap) FUNC_ENTER_NOAPI(H5HL_flush, FAIL); /* check arguments */ - assert(f); - assert(H5F_addr_defined(addr)); - assert(heap); + HDassert( f ); + HDassert( H5F_addr_defined(addr) ); + HDassert( heap ); + HDassert( heap->disk_alloc == heap->mem_alloc ); if (heap->cache_info.is_dirty) { haddr_t hdr_end_addr; size_t sizeof_hdr = H5HL_SIZEOF_HDR(f); /* cache H5HL header size for file */ - /* Minimize the heap space size if possible */ - if (H5HL_minimize_heap_space(f, dxpl_id, heap) < 0) - HGOTO_ERROR(H5E_HEAP, H5E_CANTFREE, FAIL, "unable to minimize local heap space") - /* Write the header */ if (H5HL_serialize(f, heap, heap->chunk) < 0) - HGOTO_ERROR(H5E_BTREE, H5E_CANTSERIALIZE, FAIL, "unable to serialize local heap") + HGOTO_ERROR(H5E_HEAP, H5E_CANTSERIALIZE, FAIL, "unable to serialize local heap") /* Copy buffer to disk */ hdr_end_addr = addr + (hsize_t)sizeof_hdr; @@ -951,6 +965,10 @@ H5HL_remove_free(H5HL_t *heap, H5HL_free_t *fl) * H5AC_unprotect() instead of manipulating the is_dirty * field of the cache info directly. * + * John Mainzer, 8/10/05 + * Modified code to allocate file space as needed, instead + * of allocating it on eviction. + * *------------------------------------------------------------------------- */ size_t @@ -959,10 +977,12 @@ H5HL_insert(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t buf_size, const void * H5HL_t *heap = NULL; unsigned heap_flags = H5AC__NO_FLAGS_SET; H5HL_free_t *fl = NULL, *max_fl = NULL; + htri_t tri_result; + herr_t result; size_t offset = 0; size_t need_size, old_size, need_more; + size_t new_disk_alloc; hbool_t found; - size_t disk_resrv; /* Amount of additional space to reserve in file */ size_t sizeof_hdr; /* Cache H5HL header size for file */ size_t ret_value; /* Return value */ @@ -1028,18 +1048,54 @@ H5HL_insert(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t buf_size, const void * if (found==FALSE) { need_more = MAX3(need_size, heap->mem_alloc, H5HL_SIZEOF_FREE(f)); - /* Reserve space in the file to hold the increased heap size - */ - if( heap->disk_resrv == heap->mem_alloc) - disk_resrv = need_more; - else - disk_resrv = heap->mem_alloc + need_more - heap->disk_resrv; + new_disk_alloc = heap->disk_alloc + need_more; + HDassert( heap->disk_alloc < new_disk_alloc ); + H5_CHECK_OVERFLOW(heap->disk_alloc, size_t, hsize_t); + H5_CHECK_OVERFLOW(new_disk_alloc, size_t, hsize_t); + + /* extend the current heap if we can... */ + tri_result = H5MF_can_extend(f, H5FD_MEM_LHEAP, heap->addr, + (hsize_t)(heap->disk_alloc), + (hsize_t)need_more); + if ( tri_result == TRUE ) { + + result = H5MF_extend(f, H5FD_MEM_LHEAP, heap->addr, + (hsize_t)(heap->disk_alloc), + (hsize_t)need_more); + if ( result < 0 ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, (size_t)(-1), \ + "can't extend heap on disk"); + } + + heap->disk_alloc = new_disk_alloc; + + } else { /* ...if we can't, allocate a new chunk & release the old */ + + haddr_t old_addr = heap->addr; + haddr_t new_addr; + + /* The new allocation may fail -- to avoid the possiblity of + * file corruption, allocate the new heap first, and then + * deallocate the old. + */ + + /* allocate new disk space for the heap */ + if ( (new_addr = H5MF_alloc(f, H5FD_MEM_LHEAP, dxpl_id, + (hsize_t)new_disk_alloc)) == HADDR_UNDEF ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, (size_t)(-1), \ + "unable to allocate file space for heap") + } - if( H5MF_reserve(f, (hsize_t)disk_resrv) < 0 ) - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, (size_t)(-1), "unable to reserve space in file"); + /* Release old space on disk */ + H5MF_xfree(f, H5FD_MEM_LHEAP, dxpl_id, old_addr, + (hsize_t)heap->disk_alloc); + H5E_clear_stack(NULL); /* don't really care if the free failed */ - /* Update heap's record of how much space it has reserved */ - heap->disk_resrv += disk_resrv; + heap->addr = new_addr; + heap->disk_alloc = new_disk_alloc; + } if (max_fl && max_fl->offset + max_fl->size == heap->mem_alloc) { /* @@ -1117,7 +1173,8 @@ done: HDONE_ERROR(H5E_HEAP, H5E_PROTECT, (size_t)(-1), "unable to release object header"); FUNC_LEAVE_NOAPI(ret_value); -} + +} /* H5HL_insert() */ #ifdef NOT_YET @@ -1217,6 +1274,11 @@ done: * H5AC_unprotect() instead of manipulating the is_dirty * field of the cache info directly. * + * John Mainzer, 8/10/05 + * Modified code to attempt to decrease heap size if the + * entry removal results in a free list entry at the end + * of the heap that is at least half the size of the heap. + * *------------------------------------------------------------------------- */ herr_t @@ -1225,15 +1287,15 @@ H5HL_remove(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t offset, size_t size) H5HL_t *heap = NULL; unsigned heap_flags = H5AC__NO_FLAGS_SET; H5HL_free_t *fl = NULL, *fl2 = NULL; - herr_t ret_value=SUCCEED; /* Return value */ + herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(H5HL_remove, FAIL); /* check arguments */ - assert(f); - assert(H5F_addr_defined(addr)); - assert(size > 0); - assert (offset==H5HL_ALIGN (offset)); + HDassert( f ); + HDassert( H5F_addr_defined(addr) ); + HDassert( size > 0 ); + HDassert( offset == H5HL_ALIGN(offset) ); if (0==(f->intent & H5F_ACC_RDWR)) HGOTO_ERROR (H5E_HEAP, H5E_WRITEERROR, FAIL, "no write intent on file"); @@ -1243,8 +1305,9 @@ H5HL_remove(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t offset, size_t size) if (NULL == (heap = H5AC_protect(f, dxpl_id, H5AC_LHEAP, addr, NULL, NULL, H5AC_WRITE))) HGOTO_ERROR(H5E_HEAP, H5E_PROTECT, FAIL, "unable to load heap"); - assert(offset < heap->mem_alloc); - assert(offset + size <= heap->mem_alloc); + HDassert( offset < heap->mem_alloc ); + HDassert( offset + size <= heap->mem_alloc ); + HDassert( heap->disk_alloc == heap->mem_alloc ); fl = heap->freelist; heap_flags |= H5AC__DIRTIED_FLAG; @@ -1268,10 +1331,29 @@ H5HL_remove(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t offset, size_t size) assert (fl->offset==H5HL_ALIGN (fl->offset)); assert (fl->size==H5HL_ALIGN (fl->size)); fl2 = H5HL_remove_free(heap, fl2); + if ( ( (fl->offset + fl->size) == heap->mem_alloc ) && + ( (2 * fl->size) > heap->mem_alloc ) ) { + + if ( H5HL_minimize_heap_space(f, dxpl_id, heap) != + SUCCEED ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, \ + "heap size minimization failed"); + } + } HGOTO_DONE(SUCCEED); } fl2 = fl2->next; } + if ( ( (fl->offset + fl->size) == heap->mem_alloc ) && + ( (2 * fl->size) > heap->mem_alloc ) ) { + + if ( H5HL_minimize_heap_space(f, dxpl_id, heap) != SUCCEED ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, \ + "heap size minimization failed"); + } + } HGOTO_DONE(SUCCEED); } else if (fl->offset + fl->size == offset) { @@ -1283,10 +1365,29 @@ H5HL_remove(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t offset, size_t size) fl->size += fl2->size; assert (fl->size==H5HL_ALIGN (fl->size)); fl2 = H5HL_remove_free(heap, fl2); + if ( ( (fl->offset + fl->size) == heap->mem_alloc ) && + ( (2 * fl->size) > heap->mem_alloc ) ) { + + if ( H5HL_minimize_heap_space(f, dxpl_id, heap) != + SUCCEED ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, \ + "heap size minimization failed"); + } + } HGOTO_DONE(SUCCEED); } fl2 = fl2->next; } + if ( ( (fl->offset + fl->size) == heap->mem_alloc ) && + ( (2 * fl->size) > heap->mem_alloc ) ) { + + if ( H5HL_minimize_heap_space(f, dxpl_id, heap) != SUCCEED ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, \ + "heap size minimization failed"); + } + } HGOTO_DONE(SUCCEED); } fl = fl->next; @@ -1321,6 +1422,16 @@ H5HL_remove(H5F_t *f, hid_t dxpl_id, haddr_t addr, size_t offset, size_t size) heap->freelist->prev = fl; heap->freelist = fl; + if ( ( (fl->offset + fl->size) == heap->mem_alloc ) && + ( (2 * fl->size) > heap->mem_alloc ) ) { + + if ( H5HL_minimize_heap_space(f, dxpl_id, heap) != SUCCEED ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, \ + "heap size minimization failed"); + } + } + done: if (heap && H5AC_unprotect(f, dxpl_id, H5AC_LHEAP, addr, heap, heap_flags) != SUCCEED) HDONE_ERROR(H5E_HEAP, H5E_PROTECT, FAIL, "unable to release object header"); diff --git a/src/H5HLpkg.h b/src/H5HLpkg.h index 1a61866..8b099cc 100644 --- a/src/H5HLpkg.h +++ b/src/H5HLpkg.h @@ -67,7 +67,6 @@ struct H5HL_t { haddr_t addr; /*address of data */ size_t disk_alloc; /*data bytes allocated on disk */ size_t mem_alloc; /*data bytes allocated in mem */ - size_t disk_resrv; /*data bytes "reserved" on disk */ uint8_t *chunk; /*the chunk, including header */ H5HL_free_t *freelist; /*the free list */ }; @@ -399,9 +399,9 @@ done: * * Purpose: Extend a block in the file. * - * Return: Success: TRUE(1)/FALSE(0) + * Return: Success: Non-negative * - * Failure: FAIL + * Failure: Negative * * Programmer: Quincey Koziol * Saturday, June 12, 2004 @@ -410,10 +410,10 @@ done: * *------------------------------------------------------------------------- */ -htri_t +herr_t H5MF_extend(H5F_t *f, H5FD_mem_t type, haddr_t addr, hsize_t size, hsize_t extra_requested) { - htri_t ret_value; /* Return value */ + herr_t ret_value; /* Return value */ FUNC_ENTER_NOAPI(H5MF_extend, FAIL); diff --git a/src/H5MFprivate.h b/src/H5MFprivate.h index ee35c2a..01b8204 100644 --- a/src/H5MFprivate.h +++ b/src/H5MFprivate.h @@ -53,7 +53,7 @@ H5_DLL herr_t H5MF_free_reserved(H5F_t *f, hsize_t size); H5_DLL hbool_t H5MF_alloc_overflow(H5F_t *f, hsize_t size); H5_DLL htri_t H5MF_can_extend(H5F_t *f, H5FD_mem_t type, haddr_t addr, hsize_t size, hsize_t extra_requested); -H5_DLL htri_t H5MF_extend(H5F_t *f, H5FD_mem_t type, haddr_t addr, hsize_t size, +H5_DLL herr_t H5MF_extend(H5F_t *f, H5FD_mem_t type, haddr_t addr, hsize_t size, hsize_t extra_requested); #endif @@ -89,10 +89,12 @@ static int H5O_append_real(H5F_t *f, hid_t dxpl_id, H5O_t *oh, unsigned * oh_flags_ptr); static herr_t H5O_remove_real(H5G_entry_t *ent, const H5O_class_t *type, int sequence, H5O_operator_t op, void *op_data, hbool_t adj_link, hid_t dxpl_id); -static unsigned H5O_alloc(H5F_t *f, H5O_t *oh, const H5O_class_t *type, - size_t size, unsigned * oh_flags_ptr); -static unsigned H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, unsigned chunkno, size_t size); -static unsigned H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size); +static unsigned H5O_alloc(H5F_t *f, hid_t dxpl_id, H5O_t *oh, + const H5O_class_t *type, size_t size, hbool_t * oh_dirtied_ptr); +static htri_t H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, + unsigned chunkno, size_t size, unsigned * msg_idx); +static unsigned H5O_alloc_new_chunk(H5F_t *f, hid_t dxpl_id, H5O_t *oh, + size_t size); static herr_t H5O_delete_oh(H5F_t *f, hid_t dxpl_id, H5O_t *oh); static herr_t H5O_delete_mesg(H5F_t *f, hid_t dxpl_id, H5O_mesg_t *mesg, hbool_t adj_link); @@ -582,8 +584,12 @@ H5O_load(H5F_t *f, hid_t dxpl_id, haddr_t addr, const void UNUSED * _udata1, p += 3; /*reserved*/ /* Try to detect invalidly formatted object header messages */ - if (p + mesg_size > oh->chunk[chunkno].image + chunk_size) - HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, NULL, "corrupt object header"); + if (p + mesg_size > oh->chunk[chunkno].image + chunk_size) { + + HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, NULL, \ + "corrupt object header"); + } + /* Skip header messages we don't know about */ /* (Usually from future versions of the library */ @@ -740,17 +746,12 @@ H5O_flush(H5F_t *f, hid_t dxpl_id, hbool_t destroy, haddr_t addr, H5O_t *oh) /* allocate file space for chunks that have none yet */ if (H5O_CONT_ID == curr_msg->type->id && !H5F_addr_defined(((H5O_cont_t *)(curr_msg->native))->addr)) { - cont = (H5O_cont_t *) (curr_msg->native); - assert(cont->chunkno < oh->nchunks); - assert(!H5F_addr_defined(oh->chunk[cont->chunkno].addr)); - cont->size = oh->chunk[cont->chunkno].size; - - /* Free the space we'd reserved in the file to hold this chunk */ - H5MF_free_reserved(f, (hsize_t)cont->size); - - if (HADDR_UNDEF==(cont->addr=H5MF_alloc(f, H5FD_MEM_OHDR, dxpl_id, (hsize_t)cont->size))) - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "unable to allocate space for object header data"); - oh->chunk[cont->chunkno].addr = cont->addr; + /* We now allocate disk space on insertion, instead + * of on flush from the cache, so this case is now an + * error. -- JRM + */ + HGOTO_ERROR(H5E_OHDR, H5E_SYSTEM, FAIL, + "File space for message not allocated!?!"); } /* @@ -1972,7 +1973,7 @@ H5O_modify_real(H5G_entry_t *ent, const H5O_class_t *type, int overwrite, /* Update the modification time message if any */ if(update_flags&H5O_UPDATE_TIME) - H5O_touch_oh(ent->file, oh, FALSE, &oh_flags); + H5O_touch_oh(ent->file, dxpl_id, oh, FALSE, &oh_flags); /* Set return value */ ret_value = sequence; @@ -2258,7 +2259,8 @@ H5O_new_mesg(H5F_t *f, H5O_t *oh, unsigned *flags, const H5O_class_t *orig_type, HGOTO_ERROR (H5E_OHDR, H5E_CANTINIT, UFAIL, "object header message is too large (16k max)"); /* Allocate space in the object headed for the message */ - if ((ret_value = H5O_alloc(f, oh, orig_type, size, oh_flags_ptr)) == UFAIL) + if ((ret_value = H5O_alloc(f, dxpl_id, oh, + orig_type, size, oh_flags_ptr)) == UFAIL) HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, UFAIL, "unable to allocate space for message"); /* Increment any links in message */ @@ -2346,10 +2348,18 @@ done: * In this case, that requires the addition of the oh_dirtied_ptr * parameter to track whether *oh is dirty. * + * John Mainzer, 8/20/05 + * Added dxpl_id parameter needed by the revised version of + * H5O_alloc(). + * *------------------------------------------------------------------------- */ herr_t -H5O_touch_oh(H5F_t *f, H5O_t *oh, hbool_t force, unsigned * oh_flags_ptr) +H5O_touch_oh(H5F_t *f, + hid_t dxpl_id, + H5O_t *oh, + hbool_t force, + unsigned * oh_flags_ptr) { unsigned idx; #ifdef H5_HAVE_GETTIMEOFDAY @@ -2382,7 +2392,9 @@ H5O_touch_oh(H5F_t *f, H5O_t *oh, hbool_t force, unsigned * oh_flags_ptr) if (!force) HGOTO_DONE(SUCCEED); /*nothing to do*/ size = (H5O_MTIME_NEW->raw_size)(f, &now); - if ((idx=H5O_alloc(f, oh, H5O_MTIME_NEW, size, oh_flags_ptr))==UFAIL) + + if ((idx=H5O_alloc(f, dxpl_id, oh, H5O_MTIME_NEW, + size, oh_flags_ptr))==UFAIL) HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, FAIL, "unable to allocate space for modification time message"); } @@ -2442,7 +2454,7 @@ H5O_touch(H5G_entry_t *ent, hbool_t force, hid_t dxpl_id) HGOTO_ERROR(H5E_OHDR, H5E_CANTLOAD, FAIL, "unable to load object header"); /* Create/Update the modification time message */ - if (H5O_touch_oh(ent->file, oh, force, &oh_flags)<0) + if (H5O_touch_oh(ent->file, dxpl_id, oh, force, &oh_flags)<0) HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, FAIL, "unable to update object modificaton time"); done: @@ -2473,10 +2485,13 @@ done: * In this case, that requires the addition of the oh_dirtied_ptr * parameter to track whether *oh is dirty. * + * John Mainzer, 8/20/05 + * Added dxpl_id parameter needed by call to H5O_alloc(). + * *------------------------------------------------------------------------- */ herr_t -H5O_bogus_oh(H5F_t *f, H5O_t *oh, hbool_t * oh_flags_ptr) +H5O_bogus_oh(H5F_t *f, hid_t dxpl_id, H5O_t *oh, hbool_t * oh_flags_ptr) { int idx; size_t size; @@ -2496,7 +2511,7 @@ H5O_bogus_oh(H5F_t *f, H5O_t *oh, hbool_t * oh_flags_ptr) /* Create a new message */ if (idx==oh->nmesgs) { size = (H5O_BOGUS->raw_size)(f, NULL); - if ((idx=H5O_alloc(f, oh, H5O_BOGUS, size, oh_flags_ptr))<0) + if ((idx=H5O_alloc(f, dxpl_id, oh, H5O_BOGUS, size, oh_flags_ptr))<0) HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, FAIL, "unable to allocate space for 'bogus' message"); /* Allocate the native message in memory */ @@ -2507,6 +2522,7 @@ H5O_bogus_oh(H5F_t *f, H5O_t *oh, hbool_t * oh_flags_ptr) ((H5O_bogus_t *)(oh->mesg[idx].native))->u = H5O_BOGUS_VALUE; /* Mark the message and object header as dirty */ + *oh_flags_ptr = TRUE; oh->mesg[idx].dirty = TRUE; oh->dirty = TRUE; } /* end if */ @@ -2558,7 +2574,7 @@ H5O_bogus(H5G_entry_t *ent, hid_t dxpl_id) HGOTO_ERROR(H5E_OHDR, H5E_CANTLOAD, FAIL, "unable to load object header"); /* Create the "bogus" message */ - if (H5O_bogus_oh(ent->file, oh, &oh_flags)<0) + if (H5O_bogus_oh(ent->file, dxpl_id, oh, &oh_flags)<0) HGOTO_ERROR(H5E_OHDR, H5E_CANTINIT, FAIL, "unable to update object 'bogus' message"); done: @@ -2816,71 +2832,115 @@ done: } /* end H5O_remove_real() */ -/*------------------------------------------------------------------------- - * Function: H5O_alloc_extend_chunk +/*------------------------------------------------------------------------- * - * Purpose: Extends a chunk which hasn't been allocated on disk yet - * to make the chunk large enough to contain a message whose - * data size is exactly SIZE bytes (SIZE need not be aligned). + * Function: H5O_alloc_extend_chunk * - * If the last message of the chunk is the null message, then - * that message will be extended with the chunk. Otherwise a - * new null message is created. + * Purpose: Attempt to extend a chunk that is allocated on disk. * - * F is the file in which the chunk will be written. It is - * included to ensure that there is enough space to extend - * this chunk. + * If the extension is successful, and if the last message + * of the chunk is the null message, then that message will + * be extended with the chunk. Otherwise a new null message + * is created. * - * Return: Success: Message index for null message which - * is large enough to hold SIZE bytes. + * f is the file in which the chunk will be written. It is + * included to ensure that there is enough space to extend + * this chunk. * - * Failure: Negative + * Return: TRUE: The chunk has been extended, and *msg_idx + * contains the message index for null message + * which is large enough to hold size bytes. * - * Programmer: Robb Matzke - * matzke@llnl.gov - * Aug 7 1997 + * FALSE: The chunk cannot be extended, and *msg_idx + * is undefined. + * + * FAIL: Some internal error has been detected. + * + * Programmer: John Mainzer -- 8/16/05 * * Modifications: - * Robb Matzke, 1999-08-26 - * If new memory is allocated as a multiple of some alignment - * then we're careful to initialize the part of the new memory - * from the end of the expected message to the end of the new - * memory. + * *------------------------------------------------------------------------- */ -static unsigned -H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, unsigned chunkno, size_t size) +static htri_t +H5O_alloc_extend_chunk(H5F_t *f, + H5O_t *oh, + unsigned chunkno, + size_t size, + unsigned * msg_idx) { - unsigned u; - unsigned idx; - size_t delta, old_size; - size_t aligned_size = H5O_ALIGN(size); - uint8_t *old_addr; - unsigned ret_value; + unsigned u; + unsigned idx; + unsigned i; + size_t delta, old_size; + size_t aligned_size = H5O_ALIGN(size); + uint8_t *old_addr; + herr_t result; + htri_t tri_result; + htri_t ret_value; /* return value */ + hbool_t cont_updated; FUNC_ENTER_NOAPI_NOINIT(H5O_alloc_extend_chunk); /* check args */ - assert(oh); - assert(chunkno < oh->nchunks); - assert(size > 0); + HDassert( f != NULL ); + HDassert( oh != NULL ); + HDassert( chunkno < oh->nchunks ); + HDassert( size > 0 ); + HDassert( msg_idx != NULL ); - if (H5F_addr_defined(oh->chunk[chunkno].addr)) - HGOTO_ERROR(H5E_OHDR, H5E_NOSPACE, UFAIL, "chunk is on disk"); + if ( !H5F_addr_defined(oh->chunk[chunkno].addr) ) { + + HGOTO_ERROR(H5E_OHDR, H5E_NOSPACE, FAIL, "chunk isn't on disk"); + } + + /* Test to see if the specified chunk ends with a null messages. If + * it does, try to extend the chunk and (thereby) the null message. + * If successful, return the index of the the null message in *msg_idx. + */ + for ( idx = 0; idx < oh->nmesgs; idx++ ) + { + if (oh->mesg[idx].chunkno==chunkno) { - /* try to extend a null message */ - for (idx=0; idx<oh->nmesgs; idx++) { - if (oh->mesg[idx].chunkno==chunkno) { if (H5O_NULL_ID == oh->mesg[idx].type->id && (oh->mesg[idx].raw + oh->mesg[idx].raw_size == oh->chunk[chunkno].image + oh->chunk[chunkno].size)) { - delta = MAX (H5O_MIN_SIZE, aligned_size - oh->mesg[idx].raw_size); - assert (delta=H5O_ALIGN (delta)); + delta = MAX(H5O_MIN_SIZE, + aligned_size - oh->mesg[idx].raw_size); + + HDassert( delta == H5O_ALIGN(delta) ); + + /* determine whether the chunk can be extended */ + tri_result = H5MF_can_extend(f, H5FD_MEM_OHDR, + oh->chunk[chunkno].addr, + (hsize_t)(oh->chunk[chunkno].size), + (hsize_t)delta); + + if ( tri_result == FALSE ) { /* can't extend -- we are done */ + + HGOTO_DONE(FALSE); + + } else if ( tri_result != TRUE ) { /* system error */ - /* Reserve space in the file to hold the increased chunk size */ - if( H5MF_reserve(f, (hsize_t)delta) < 0 ) - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, "unable to reserve space in file"); + HGOTO_ERROR (H5E_RESOURCE, H5E_SYSTEM, FAIL, \ + "H5MF_can_extend() failed"); + } + + /* if we get this far, we should be able to extend the chunk */ + result = H5MF_extend(f, H5FD_MEM_OHDR, + oh->chunk[chunkno].addr, + (hsize_t)(oh->chunk[chunkno].size), + (hsize_t)delta); + + if ( result < 0 ) { /* system error */ + + HGOTO_ERROR (H5E_RESOURCE, H5E_SYSTEM, FAIL, \ + "H5MF_extend() failed."); + } + + + /* chunk size has been increased -- tidy up */ oh->mesg[idx].dirty = TRUE; oh->mesg[idx].raw_size += delta; @@ -2888,12 +2948,19 @@ H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, unsigned chunkno, size_t size) old_addr = oh->chunk[chunkno].image; /* Be careful not to indroduce garbage */ - oh->chunk[chunkno].image = H5FL_BLK_REALLOC(chunk_image,old_addr, - (oh->chunk[chunkno].size + delta)); - if (NULL==oh->chunk[chunkno].image) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); + oh->chunk[chunkno].image = + H5FL_BLK_REALLOC(chunk_image,old_addr, + (oh->chunk[chunkno].size + delta)); + + if ( NULL == oh->chunk[chunkno].image ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \ + "memory allocation failed"); + } + HDmemset(oh->chunk[chunkno].image + oh->chunk[chunkno].size, 0, delta); + oh->chunk[chunkno].size += delta; /* adjust raw addresses for messages of this chunk */ @@ -2904,35 +2971,106 @@ H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, unsigned chunkno, size_t size) (oh->mesg[u].raw - old_addr); } } - HGOTO_DONE(idx); + + /* adjust the continuation message pointing to this chunk + * for the increase in chunk size. + * + * As best I understand the code, it is not necessarily an + * error if there is no continuation message pointing to a + * chunk -- for example, chunk 0 seems to be pointed to by + * the object header. + */ + cont_updated = FALSE; + for ( i = 0; i < oh->nmesgs; i++ ) + { + if ( ( H5O_CONT_ID == oh->mesg[i].type->id ) && + ( ((H5O_cont_t *)(oh->mesg[i].native))->chunkno + == chunkno ) ) { + + HDassert( ((H5O_cont_t *)(oh->mesg[i].native))->size + == oh->chunk[chunkno].size - delta ); + + ((H5O_cont_t *)(oh->mesg[i].native))->size = + oh->chunk[chunkno].size; + + cont_updated = TRUE; + + /* there should be at most one continuation message + * pointing to this chunk, so we can quit when we find + * and update it. + */ + break; + } + } + HDassert( ( chunkno == 0 ) || ( cont_updated ) ); + + *msg_idx = idx; + HGOTO_DONE(TRUE); } } /* end if */ - } + } /* end for */ - /* Reserve space in the file */ + /* if we get this far, the specified chunk does not end in a null message. + * Attempt to extend the chunk, and if successful, fill the new section + * of the chunk with a null messages. + */ + + /* compute space needed in the file */ delta = MAX(H5O_MIN_SIZE, aligned_size+H5O_SIZEOF_MSGHDR(f)); delta = H5O_ALIGN(delta); - if( H5MF_reserve(f, (hsize_t)delta) < 0 ) - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, "unable to reserve space in file"); + /* determine whether the chunk can be extended */ + tri_result = H5MF_can_extend(f, H5FD_MEM_OHDR, + oh->chunk[chunkno].addr, + (hsize_t)(oh->chunk[chunkno].size), + (hsize_t)delta); + + if ( tri_result == FALSE ) { /* can't extend -- we are done */ + + HGOTO_DONE(FALSE); + + } else if ( tri_result != TRUE ) { /* system error */ + + HGOTO_ERROR(H5E_RESOURCE, H5E_SYSTEM, FAIL, "H5MF_can_extend() failed"); + } + + /* if we get this far, we should be able to extend the chunk */ + result = H5MF_extend(f, H5FD_MEM_OHDR, + oh->chunk[chunkno].addr, + (hsize_t)(oh->chunk[chunkno].size), + (hsize_t)delta); + + if ( result < 0 ) { /* system error */ + + HGOTO_ERROR(H5E_RESOURCE, H5E_SYSTEM, FAIL, \ + "H5MF_extend() failed"); + } + /* create a new null message */ - if (oh->nmesgs >= oh->alloc_nmesgs) { + if ( oh->nmesgs >= oh->alloc_nmesgs ) { + unsigned na = oh->alloc_nmesgs + H5O_NMESGS; + H5O_mesg_t *x = H5FL_SEQ_REALLOC (H5O_mesg_t, oh->mesg, (size_t)na); - if (NULL==x) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); + if ( NULL == x ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "memory allocation failed"); + } oh->alloc_nmesgs = na; oh->mesg = x; } + idx = oh->nmesgs++; + oh->mesg[idx].type = H5O_NULL; oh->mesg[idx].dirty = TRUE; oh->mesg[idx].native = NULL; oh->mesg[idx].raw = oh->chunk[chunkno].image + - oh->chunk[chunkno].size + - H5O_SIZEOF_MSGHDR(f); + oh->chunk[chunkno].size + + H5O_SIZEOF_MSGHDR(f); oh->mesg[idx].raw_size = delta - H5O_SIZEOF_MSGHDR(f); oh->mesg[idx].chunkno = chunkno; @@ -2940,74 +3078,125 @@ H5O_alloc_extend_chunk(H5F_t *f, H5O_t *oh, unsigned chunkno, size_t size) old_size = oh->chunk[chunkno].size; oh->chunk[chunkno].size += delta; oh->chunk[chunkno].image = H5FL_BLK_REALLOC(chunk_image,old_addr, - oh->chunk[chunkno].size); - if (NULL==oh->chunk[chunkno].image) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); + oh->chunk[chunkno].size); + if (NULL==oh->chunk[chunkno].image) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \ + "memory allocation failed"); + } + HDmemset(oh->chunk[chunkno].image+old_size, 0, oh->chunk[chunkno].size - old_size); /* adjust raw addresses for messages of this chunk */ if (old_addr != oh->chunk[chunkno].image) { - for (u = 0; u < oh->nmesgs; u++) { - if (oh->mesg[u].chunkno == chunkno) - oh->mesg[u].raw = oh->chunk[chunkno].image + - (oh->mesg[u].raw - old_addr); - } + for (u = 0; u < oh->nmesgs; u++) { + if (oh->mesg[u].chunkno == chunkno) + oh->mesg[u].raw = oh->chunk[chunkno].image + + (oh->mesg[u].raw - old_addr); + } + } + + /* adjust the continuation message pointing to this chunk for the + * increase in chunk size. + * + * As best I understand the code, it is not necessarily an error + * if there is no continuation message pointing to a chunk -- for + * example, chunk 0 seems to be pointed to by the object header. + */ + cont_updated = FALSE; + for ( i = 0; i < oh->nmesgs; i++ ) + { + if ( ( H5O_CONT_ID == oh->mesg[i].type->id ) && + ( ((H5O_cont_t *)(oh->mesg[i].native))->chunkno == chunkno ) ) { + + HDassert( ((H5O_cont_t *)(oh->mesg[i].native))->size == + oh->chunk[chunkno].size - delta ); + + ((H5O_cont_t *)(oh->mesg[i].native))->size = + oh->chunk[chunkno].size; + + cont_updated = TRUE; + + /* there should be at most one continuation message + * pointing to this chunk, so we can quit when we find + * and update it. + */ + break; + } } + HDassert( ( chunkno == 0 ) || ( cont_updated ) ); /* Set return value */ - ret_value=idx; + *msg_idx = idx; + ret_value = TRUE; done: + FUNC_LEAVE_NOAPI(ret_value); -} + +} /* H5O_alloc_extend_chunk() */ /*------------------------------------------------------------------------- - * Function: H5O_alloc_new_chunk + * Function: H5O_alloc_new_chunk * - * Purpose: Allocates a new chunk for the object header but doen't - * give the new chunk a file address yet. One of the other - * chunks will get an object continuation message. If there - * isn't room in any other chunk for the object continuation - * message, then some message from another chunk is moved into - * this chunk to make room. + * Purpose: Allocates a new chunk for the object header, including + * file space. * - * SIZE need not be aligned. + * One of the other chunks will get an object continuation + * message. If there isn't room in any other chunk for the + * object continuation message, then some message from + * another chunk is moved into this chunk to make room. * - * Return: Success: Index number of the null message for the - * new chunk. The null message will be at - * least SIZE bytes not counting the message - * ID or size fields. + * SIZE need not be aligned. * - * Failure: Negative + * Return: Success: Index number of the null message for the + * new chunk. The null message will be at + * least SIZE bytes not counting the message + * ID or size fields. * - * Programmer: Robb Matzke - * matzke@llnl.gov - * Aug 7 1997 + * Failure: Negative + * + * Programmer: Robb Matzke + * matzke@llnl.gov + * Aug 7 1997 * * Modifications: * + * John Mainzer, 8/17/05 + * Reworked function to allocate file space immediately, + * instead of just allocating core space (as it used to). + * This change was necessary, as we were allocating file + * space on metadata cache eviction, which need not be + * synchronized across all processes. As a result, + * different processes were allocating different file + * locations to the same chunk. + * *------------------------------------------------------------------------- */ static unsigned -H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size) +H5O_alloc_new_chunk(H5F_t *f, + hid_t dxpl_id, + H5O_t *oh, + size_t size) { - size_t cont_size; /*continuation message size */ - int found_null = (-1); /*best fit null message */ - int found_other = (-1); /*best fit other message */ - unsigned idx; /*message number */ - uint8_t *p = NULL; /*ptr into new chunk */ - H5O_cont_t *cont = NULL; /*native continuation message */ - int chunkno; - unsigned u; - unsigned ret_value; /*return value */ + size_t cont_size; /*continuation message size */ + int found_null = (-1); /*best fit null message */ + int found_other = (-1); /*best fit other message */ + unsigned idx; /*message number */ + uint8_t *p = NULL; /*ptr into new chunk */ + H5O_cont_t *cont = NULL; /*native continuation message */ + int chunkno; + unsigned u; + unsigned ret_value; /*return value */ + haddr_t new_chunk_addr; FUNC_ENTER_NOAPI_NOINIT(H5O_alloc_new_chunk); /* check args */ - assert (oh); - assert (size > 0); + HDassert (oh); + HDassert (size > 0); size = H5O_ALIGN(size); /* @@ -3018,23 +3207,23 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size) */ cont_size = H5O_ALIGN (H5F_SIZEOF_ADDR(f) + H5F_SIZEOF_SIZE(f)); for (u=0; u<oh->nmesgs; u++) { - if (H5O_NULL_ID == oh->mesg[u].type->id) { - if (cont_size == oh->mesg[u].raw_size) { - found_null = u; - break; - } else if (oh->mesg[u].raw_size >= cont_size && - (found_null < 0 || - (oh->mesg[u].raw_size < - oh->mesg[found_null].raw_size))) { - found_null = u; - } - } else if (H5O_CONT_ID == oh->mesg[u].type->id) { - /*don't consider continuation messages */ - } else if (oh->mesg[u].raw_size >= cont_size && - (found_other < 0 || - oh->mesg[u].raw_size < oh->mesg[found_other].raw_size)) { - found_other = u; - } + if (H5O_NULL_ID == oh->mesg[u].type->id) { + if (cont_size == oh->mesg[u].raw_size) { + found_null = u; + break; + } else if (oh->mesg[u].raw_size >= cont_size && + (found_null < 0 || + (oh->mesg[u].raw_size < + oh->mesg[found_null].raw_size))) { + found_null = u; + } + } else if (H5O_CONT_ID == oh->mesg[u].type->id) { + /*don't consider continuation messages */ + } else if (oh->mesg[u].raw_size >= cont_size && + (found_other < 0 || + oh->mesg[u].raw_size < oh->mesg[found_other].raw_size)) { + found_other = u; + } } assert(found_null >= 0 || found_other >= 0); @@ -3043,36 +3232,47 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size) * message, then make sure the new chunk has enough room for that * other message. */ - if (found_null < 0) - size += H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size; + if ( found_null < 0 ) { + + size += H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size; + } /* * The total chunk size must include the requested space plus enough - * for the message header. This must be at least some minimum and a + * for the message header. This must be at least some minimum and a * multiple of the alignment size. */ size = MAX(H5O_MIN_SIZE, size + H5O_SIZEOF_MSGHDR(f)); assert (size == H5O_ALIGN (size)); - /* Reserve space in the file to hold the new chunk */ - if( H5MF_reserve(f, (hsize_t)size) < 0 ) - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, "unable to reserve space in file for new chunk"); + /* allocate space in file to hold the new chunk */ + new_chunk_addr = H5MF_alloc(f, H5FD_MEM_OHDR, dxpl_id, (hsize_t)size); + if ( HADDR_UNDEF == new_chunk_addr ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \ + "unable to allocate space for new chunk"); + } /* - * Create the new chunk without giving it a file address. + * Create the new chunk giving it a file address. */ - if (oh->nchunks >= oh->alloc_nchunks) { + if ( oh->nchunks >= oh->alloc_nchunks ) { + unsigned na = oh->alloc_nchunks + H5O_NCHUNKS; H5O_chunk_t *x = H5FL_SEQ_REALLOC (H5O_chunk_t, oh->chunk, (size_t)na); - if (!x) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); + if ( !x ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \ + "memory allocation failed"); + } oh->alloc_nchunks = na; oh->chunk = x; } + chunkno = oh->nchunks++; oh->chunk[chunkno].dirty = TRUE; - oh->chunk[chunkno].addr = HADDR_UNDEF; + oh->chunk[chunkno].addr = new_chunk_addr; oh->chunk[chunkno].size = size; if (NULL==(oh->chunk[chunkno].image = p = H5FL_BLK_CALLOC(chunk_image,size))) HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); @@ -3086,35 +3286,40 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size) unsigned na = oh->alloc_nmesgs + MAX (H5O_NMESGS, 3); H5O_mesg_t *x = H5FL_SEQ_REALLOC (H5O_mesg_t, oh->mesg, (size_t)na); - if (!x) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); + if ( !x ) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \ + "memory allocation failed"); + } oh->alloc_nmesgs = na; oh->mesg = x; /* Set new object header info to zeros */ HDmemset(&oh->mesg[old_alloc], 0, - (oh->alloc_nmesgs-old_alloc)*sizeof(H5O_mesg_t)); + (oh->alloc_nmesgs-old_alloc)*sizeof(H5O_mesg_t)); } /* * Describe the messages of the new chunk. */ if (found_null < 0) { - found_null = u = oh->nmesgs++; - oh->mesg[u].type = H5O_NULL; - oh->mesg[u].dirty = TRUE; - oh->mesg[u].native = NULL; - oh->mesg[u].raw = oh->mesg[found_other].raw; - oh->mesg[u].raw_size = oh->mesg[found_other].raw_size; - oh->mesg[u].chunkno = oh->mesg[found_other].chunkno; - - oh->mesg[found_other].dirty = TRUE; + found_null = u = oh->nmesgs++; + oh->mesg[u].type = H5O_NULL; + oh->mesg[u].dirty = TRUE; + oh->mesg[u].native = NULL; + oh->mesg[u].raw = oh->mesg[found_other].raw; + oh->mesg[u].raw_size = oh->mesg[found_other].raw_size; + oh->mesg[u].chunkno = oh->mesg[found_other].chunkno; + + oh->mesg[found_other].dirty = TRUE; /* Copy the message to the new location */ - HDmemcpy(p+H5O_SIZEOF_MSGHDR(f),oh->mesg[found_other].raw,oh->mesg[found_other].raw_size); - oh->mesg[found_other].raw = p + H5O_SIZEOF_MSGHDR(f); - oh->mesg[found_other].chunkno = chunkno; - p += H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size; - size -= H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size; + HDmemcpy(p + H5O_SIZEOF_MSGHDR(f), + oh->mesg[found_other].raw, + oh->mesg[found_other].raw_size); + oh->mesg[found_other].raw = p + H5O_SIZEOF_MSGHDR(f); + oh->mesg[found_other].chunkno = chunkno; + p += H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size; + size -= H5O_SIZEOF_MSGHDR(f) + oh->mesg[found_other].raw_size; } idx = oh->nmesgs++; oh->mesg[idx].type = H5O_NULL; @@ -3130,19 +3335,19 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size) * two null messages. */ if (oh->mesg[found_null].raw_size > cont_size) { - u = oh->nmesgs++; - oh->mesg[u].type = H5O_NULL; - oh->mesg[u].dirty = TRUE; - oh->mesg[u].native = NULL; - oh->mesg[u].raw = oh->mesg[found_null].raw + - cont_size + - H5O_SIZEOF_MSGHDR(f); - oh->mesg[u].raw_size = oh->mesg[found_null].raw_size - - (cont_size + H5O_SIZEOF_MSGHDR(f)); - oh->mesg[u].chunkno = oh->mesg[found_null].chunkno; - - oh->mesg[found_null].dirty = TRUE; - oh->mesg[found_null].raw_size = cont_size; + u = oh->nmesgs++; + oh->mesg[u].type = H5O_NULL; + oh->mesg[u].dirty = TRUE; + oh->mesg[u].native = NULL; + oh->mesg[u].raw = oh->mesg[found_null].raw + + cont_size + + H5O_SIZEOF_MSGHDR(f); + oh->mesg[u].raw_size = oh->mesg[found_null].raw_size - + (cont_size + H5O_SIZEOF_MSGHDR(f)); + oh->mesg[u].chunkno = oh->mesg[found_null].chunkno; + + oh->mesg[found_null].dirty = TRUE; + oh->mesg[found_null].raw_size = cont_size; } /* @@ -3150,10 +3355,13 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size) */ oh->mesg[found_null].type = H5O_CONT; oh->mesg[found_null].dirty = TRUE; - if (NULL==(cont = H5FL_MALLOC(H5O_cont_t))) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); - cont->addr = HADDR_UNDEF; - cont->size = 0; + if (NULL==(cont = H5FL_MALLOC(H5O_cont_t))) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \ + "memory allocation failed"); + } + cont->addr = oh->chunk[chunkno].addr; + cont->size = oh->chunk[chunkno].size; cont->chunkno = chunkno; oh->mesg[found_null].native = cont; @@ -3162,59 +3370,75 @@ H5O_alloc_new_chunk(H5F_t *f, H5O_t *oh, size_t size) done: FUNC_LEAVE_NOAPI(ret_value); -} + +} /* H5O_alloc_new_chunk() */ /*------------------------------------------------------------------------- - * Function: H5O_alloc + * Function: H5O_alloc * - * Purpose: Allocate enough space in the object header for this message. + * Purpose: Allocate enough space in the object header for this message. * - * Return: Success: Index of message + * Return: Success: Index of message * - * Failure: Negative + * Failure: Negative * - * Programmer: Robb Matzke - * matzke@llnl.gov - * Aug 6 1997 + * Programmer: Robb Matzke + * matzke@llnl.gov + * Aug 6 1997 * * Modifications: * * John Mainzer, 6/7/05 * Modified function to use the new dirtied parameter to - * H5AC_unprotect() instead of modfying the is_dirty field. + * H5AC_unprotect() instead of modfying the is_dirty field directly. * In this case, that requires the addition of the oh_dirtied_ptr * parameter to track whether *oh is dirty. * + * John Mainzer, 8/19/05 + * Reworked the function to allocate disk space immediately instead + * of waiting to cache eviction time. This is necessary since cache + * evictions need not be synchronized across the processes in the + * PHDF5 case. + * + * Note the use of a revised versions of H5O_alloc_new_chunk() and + * H5O_alloc_extend_chunk(). + * *------------------------------------------------------------------------- */ static unsigned -H5O_alloc(H5F_t *f, H5O_t *oh, const H5O_class_t *type, size_t size, unsigned * oh_flags_ptr) +H5O_alloc(H5F_t *f, + hid_t dxpl_id, + H5O_t *oh, + const H5O_class_t *type, + size_t size, + unsigned * oh_flags_ptr) { - unsigned idx; + unsigned idx = UFAIL; H5O_mesg_t *msg; /* Pointer to newly allocated message */ - size_t aligned_size = H5O_ALIGN(size); - unsigned ret_value; /* Return value */ + size_t aligned_size = H5O_ALIGN(size); + htri_t tri_result; + unsigned ret_value; /* Return value */ FUNC_ENTER_NOAPI_NOINIT(H5O_alloc); /* check args */ - assert (oh); - assert (type); - assert (oh_flags_ptr); + HDassert (oh); + HDassert (type); + HDassert (oh_flags_ptr); /* look for a null message which is large enough */ for (idx = 0; idx < oh->nmesgs; idx++) { - if (H5O_NULL_ID == oh->mesg[idx].type->id && + if (H5O_NULL_ID == oh->mesg[idx].type->id && oh->mesg[idx].raw_size >= aligned_size) - break; + break; } #ifdef LATER /* * Perhaps if we join adjacent null messages we could make one * large enough... we leave this as an exercise for future - * programmers :-) This isn't a high priority because when an + * programmers :-) This isn't a high priority because when an * object header is read from disk the null messages are combined * anyway. */ @@ -3222,63 +3446,92 @@ H5O_alloc(H5F_t *f, H5O_t *oh, const H5O_class_t *type, size_t size, unsigned * /* if we didn't find one, then allocate more header space */ if (idx >= oh->nmesgs) { - unsigned chunkno; + unsigned chunkno; - /* - * Look for a chunk which hasn't had disk space allocated yet - * since we can just increase the size of that chunk. - */ - for (chunkno = 0; chunkno < oh->nchunks; chunkno++) { - if ((idx = H5O_alloc_extend_chunk(f, oh, chunkno, size)) != UFAIL) { - break; - } - H5E_clear_stack(NULL); - } + /* check to see if we can extend one of the chunks. If we can, + * do so. Otherwise, we will have to allocate a new chunk. + * + * Note that in this new version of this function, all chunks + * must have file space allocated to them. + */ + for ( chunkno = 0; chunkno < oh->nchunks; chunkno++ ) + { + HDassert( H5F_addr_defined(oh->chunk[chunkno].addr) ); - /* - * Create a new chunk - */ - if (idx == UFAIL) { - if ((idx = H5O_alloc_new_chunk(f, oh, size)) == UFAIL) - HGOTO_ERROR(H5E_OHDR, H5E_NOSPACE, UFAIL, "unable to create a new object header data chunk"); - } + tri_result = H5O_alloc_extend_chunk(f, oh, chunkno, size, &idx); + + if ( tri_result == TRUE ) { + + break; + + } else if ( tri_result == FALSE ) { + + idx = UFAIL; + + } else { + + HGOTO_ERROR(H5E_OHDR, H5E_SYSTEM, UFAIL, \ + "H5O_alloc_extend_chunk failed unexpectedly"); + } + } + + /* if idx is still UFAIL, we were not able to extend a chunk. + * Create a new one. + */ + if (idx == UFAIL) { + + if ( (idx = H5O_alloc_new_chunk(f, dxpl_id, oh, size)) == UFAIL ) { + + HGOTO_ERROR(H5E_OHDR, H5E_NOSPACE, UFAIL, \ + "unable to create a new object header data chunk"); + } + } } /* Set pointer to newly allocated message */ - msg=&oh->mesg[idx]; + msg = &(oh->mesg[idx]); /* do we need to split the null message? */ if (msg->raw_size > aligned_size) { + H5O_mesg_t *null_msg; /* Pointer to null message */ - size_t mesg_size = aligned_size+ H5O_SIZEOF_MSGHDR(f); /* Total size of newly allocated message */ - assert(msg->raw_size - aligned_size >= H5O_SIZEOF_MSGHDR(f)); + size_t mesg_size = aligned_size + H5O_SIZEOF_MSGHDR(f); + /* Total size of newly allocated message */ - if (oh->nmesgs >= oh->alloc_nmesgs) { - int old_alloc=oh->alloc_nmesgs; - unsigned na = oh->alloc_nmesgs + H5O_NMESGS; - H5O_mesg_t *x = H5FL_SEQ_REALLOC (H5O_mesg_t, oh->mesg, (size_t)na); + HDassert( msg->raw_size - aligned_size >= H5O_SIZEOF_MSGHDR(f) ); - if (!x) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, UFAIL, "memory allocation failed"); - oh->alloc_nmesgs = na; - oh->mesg = x; + if (oh->nmesgs >= oh->alloc_nmesgs) { + + int old_alloc=oh->alloc_nmesgs; + unsigned na = oh->alloc_nmesgs + H5O_NMESGS; + H5O_mesg_t *x = H5FL_SEQ_REALLOC(H5O_mesg_t, oh->mesg, (size_t)na); - /* Set new object header info to zeros */ - HDmemset(&oh->mesg[old_alloc],0, - (oh->alloc_nmesgs-old_alloc)*sizeof(H5O_mesg_t)); + if (!x) { - /* "Retarget" local 'msg' pointer into newly allocated array of messages */ + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, UFAIL, \ + "memory allocation failed"); + } + oh->alloc_nmesgs = na; + oh->mesg = x; + + /* Set new object header info to zeros */ + HDmemset(&oh->mesg[old_alloc],0, + (oh->alloc_nmesgs-old_alloc)*sizeof(H5O_mesg_t)); + + /* "Retarget" local 'msg' pointer into newly allocated array + * of messages + */ msg=&oh->mesg[idx]; - } - null_msg=&oh->mesg[oh->nmesgs++]; - null_msg->type = H5O_NULL; - null_msg->dirty = TRUE; - null_msg->native = NULL; - null_msg->raw = msg->raw + mesg_size; - null_msg->raw_size = msg->raw_size - mesg_size; - null_msg->chunkno = msg->chunkno; - msg->raw_size = aligned_size; + } + null_msg = &(oh->mesg[oh->nmesgs++]); + null_msg->type = H5O_NULL; + null_msg->dirty = TRUE; + null_msg->native = NULL; + null_msg->raw = msg->raw + mesg_size; + null_msg->raw_size = msg->raw_size - mesg_size; + null_msg->chunkno = msg->chunkno; + msg->raw_size = aligned_size; } /* initialize the new message */ @@ -3289,11 +3542,12 @@ H5O_alloc(H5F_t *f, H5O_t *oh, const H5O_class_t *type, size_t size, unsigned * *oh_flags_ptr |= H5AC__DIRTIED_FLAG; /* Set return value */ - ret_value=idx; + ret_value = idx; done: FUNC_LEAVE_NOAPI(ret_value); -} + +} /* H5O_alloc() */ #ifdef NOT_YET @@ -3956,7 +4210,7 @@ H5O_iterate_real(const H5G_entry_t *ent, const H5O_class_t *type, H5AC_protect_t if(oh_flags & H5AC__DIRTIED_FLAG) { /* Shouldn't be able to modify object header if we don't have write access */ HDassert(prot == H5AC_WRITE); - H5O_touch_oh(ent->file, oh, FALSE, &oh_flags); + H5O_touch_oh(ent->file, dxpl_id, oh, FALSE, &oh_flags); } /* end if */ done: diff --git a/src/H5Oprivate.h b/src/H5Oprivate.h index 8a9dd7d..e3043af 100644 --- a/src/H5Oprivate.h +++ b/src/H5Oprivate.h @@ -268,11 +268,11 @@ H5_DLL herr_t H5O_unprotect(H5G_entry_t *ent, struct H5O_t *oh, hid_t dxpl_id, H5_DLL int H5O_append(H5F_t *f, hid_t dxpl_id, struct H5O_t *oh, unsigned type_id, unsigned flags, const void *mesg, unsigned * oh_flags_ptr); H5_DLL herr_t H5O_touch(H5G_entry_t *ent, hbool_t force, hid_t dxpl_id); -H5_DLL herr_t H5O_touch_oh(H5F_t *f, struct H5O_t *oh, hbool_t force, - unsigned * oh_flags_ptr); +H5_DLL herr_t H5O_touch_oh(H5F_t *f, hid_t dxpl_id, struct H5O_t *oh, + hbool_t force, unsigned * oh_flags_ptr); #ifdef H5O_ENABLE_BOGUS H5_DLL herr_t H5O_bogus(H5G_entry_t *ent, hid_t dxpl_id); -H5_DLL herr_t H5O_bogus_oh(H5F_t *f, struct H5O_t *oh, +H5_DLL herr_t H5O_bogus_oh(H5F_t *f, hid_t dxpl_id, struct H5O_t *oh, unsigned * oh_flags_ptr); #endif /* H5O_ENABLE_BOGUS */ H5_DLL herr_t H5O_remove(H5G_entry_t *ent, unsigned type_id, int sequence, |