diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/H5AC.c | 100 | ||||
-rw-r--r-- | src/H5ACmpio.c | 8 | ||||
-rw-r--r-- | src/H5ACprivate.h | 6 | ||||
-rw-r--r-- | src/H5C.c | 570 | ||||
-rw-r--r-- | src/H5Cmpio.c | 276 | ||||
-rw-r--r-- | src/H5Cpkg.h | 274 | ||||
-rw-r--r-- | src/H5Cprivate.h | 5 | ||||
-rw-r--r-- | src/H5D.c | 2 | ||||
-rw-r--r-- | src/H5Dchunk.c | 23 | ||||
-rw-r--r-- | src/H5Ddeprec.c | 3 | ||||
-rw-r--r-- | src/H5F.c | 1 | ||||
-rw-r--r-- | src/H5FDmpio.c | 78 | ||||
-rw-r--r-- | src/H5Fint.c | 10 | ||||
-rw-r--r-- | src/H5Fmpi.c | 29 | ||||
-rw-r--r-- | src/H5Fpkg.h | 4 | ||||
-rw-r--r-- | src/H5Fprivate.h | 2 | ||||
-rw-r--r-- | src/H5Pdxpl.c | 19 | ||||
-rw-r--r-- | src/H5Pfapl.c | 292 | ||||
-rw-r--r-- | src/H5Pint.c | 27 | ||||
-rw-r--r-- | src/H5Plapl.c | 14 | ||||
-rw-r--r-- | src/H5Ppkg.h | 2 | ||||
-rw-r--r-- | src/H5Pprivate.h | 8 | ||||
-rw-r--r-- | src/H5Ppublic.h | 6 |
23 files changed, 1648 insertions, 111 deletions
@@ -80,8 +80,15 @@ hbool_t H5_PKG_INIT_VAR = FALSE; /* Library Private Variables */ /*****************************/ -/* Default dataset transfer property list for metadata I/O calls */ -hid_t H5AC_dxpl_id = (-1); +/* Default dataset transfer property list for metadata I/O calls (coll write, ind read) */ +hid_t H5AC_ind_read_dxpl_id = (-1); +hid_t H5AC_dxpl_id; +#ifdef H5_HAVE_PARALLEL +/* collective metadata read property */ +hid_t H5AC_coll_read_dxpl_id = (-1); +#endif /* H5_HAVE_PARALLEL */ + +/* global flag for collective API sanity checks */ /* DXPL to be used in operations that will not result in I/O calls */ hid_t H5AC_noio_dxpl_id = (-1); @@ -174,8 +181,11 @@ done: herr_t H5AC__init_package(void) { -#ifdef H5_DEBUG_BUILD H5P_genplist_t *xfer_plist; /* Dataset transfer property list object */ +#ifdef H5_HAVE_PARALLEL + H5P_coll_md_read_flag_t coll_meta_read; +#endif /* H5_HAVE_PARALLEL */ +#ifdef H5_DEBUG_BUILD H5FD_dxpl_type_t dxpl_type; /* Property indicating the type of the internal dxpl */ #endif /* H5_DEBUG_BUILD */ herr_t ret_value = SUCCEED; /* Return value */ @@ -195,14 +205,18 @@ H5AC__init_package(void) } #endif /* H5_HAVE_PARALLEL */ -#ifdef H5_DEBUG_BUILD - /* Get an ID for the metadata (H5AC) dxpl */ - if((H5AC_dxpl_id = H5P_create_id(H5P_CLS_DATASET_XFER_g, FALSE)) < 0) +#if defined(H5_HAVE_PARALLEL) || defined(H5_DEBUG_BUILD) + /* Get an ID for the internal independent metadata dxpl */ + if((H5AC_ind_read_dxpl_id = H5P_create_id(H5P_CLS_DATASET_XFER_g, FALSE)) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, "unable to register property list") + + /* if this is a debug build, set the dxpl type flag on the + independent metadata dxpl and create the noio and raw data internal dxpls */ +#ifdef H5_DEBUG_BUILD /* Get the property list object */ - if (NULL == (xfer_plist = (H5P_genplist_t *)H5I_object(H5AC_dxpl_id))) + if (NULL == (xfer_plist = (H5P_genplist_t *)H5I_object(H5AC_ind_read_dxpl_id))) HGOTO_ERROR(H5E_CACHE, H5E_BADATOM, FAIL, "can't get new property list object") - /* Insert the dxpl type property */ + /* set metadata dxpl type */ dxpl_type = H5FD_METADATA_DXPL; if(H5P_set(xfer_plist, H5FD_DXPL_TYPE_NAME, &dxpl_type) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set dxpl type property") @@ -228,11 +242,40 @@ H5AC__init_package(void) dxpl_type = H5FD_RAWDATA_DXPL; if(H5P_set(xfer_plist, H5FD_DXPL_TYPE_NAME, &dxpl_type) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set dxpl type property") -#else /* H5_DEBUG_BUILD */ - H5AC_dxpl_id = H5P_DATASET_XFER_DEFAULT; +#endif /* H5_DEBUG_BUILD */ + + /* if this is a parallel build, create an internal dxpl for + collective metadata reads */ +#ifdef H5_HAVE_PARALLEL + /* Get an ID for H5AC_coll_read_dxpl_id */ + if((H5AC_coll_read_dxpl_id = H5P_create_id(H5P_CLS_DATASET_XFER_g, FALSE)) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, "unable to register property list") + /* Get the property list object */ + if (NULL == (xfer_plist = (H5P_genplist_t *)H5I_object(H5AC_coll_read_dxpl_id))) + HGOTO_ERROR(H5E_CACHE, H5E_BADATOM, FAIL, "can't get new property list object") + /* set 'collective metadata read' property */ + coll_meta_read = H5P_USER_TRUE; + if(H5P_set(xfer_plist, H5_COLL_MD_READ_FLAG_NAME, &coll_meta_read) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set collective metadata read flag") + + /* if we have a debug build, set the dxpl type to metadata on the + collective metadata dxpl */ +#ifdef H5_DEBUG_BUILD + /* set metadata dxpl type */ + dxpl_type = H5FD_METADATA_DXPL; + if(H5P_set(xfer_plist, H5FD_DXPL_TYPE_NAME, &dxpl_type) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set dxpl type property") +#endif /* H5_DEBUG_BUILD */ +#endif /* H5_HAVE_PARALLEL */ + +#else /* defined(H5_HAVE_PARALLEL) || defined(H5_DEBUG_BUILD) */ + H5AC_ind_read_dxpl_id = H5P_DATASET_XFER_DEFAULT; H5AC_noio_dxpl_id = H5P_DATASET_XFER_DEFAULT; H5AC_rawdata_dxpl_id = H5P_DATASET_XFER_DEFAULT; -#endif /* H5_DEBUG_BUILD */ +#endif /* defined(H5_HAVE_PARALLEL) || defined(H5_DEBUG_BUILD) */ + + /* MSC - Temp. Remove later */ + H5AC_dxpl_id = H5AC_ind_read_dxpl_id; done: FUNC_LEAVE_NOAPI(ret_value) @@ -261,21 +304,33 @@ H5AC_term_package(void) FUNC_ENTER_NOAPI_NOINIT_NOERR if(H5_PKG_INIT_VAR) { - if(H5AC_dxpl_id > 0 || H5AC_noio_dxpl_id > 0 || H5AC_rawdata_dxpl_id > 0) { -#ifdef H5_DEBUG_BUILD + if(H5AC_ind_read_dxpl_id > 0 || H5AC_noio_dxpl_id > 0 || H5AC_rawdata_dxpl_id > 0 +#ifdef H5_HAVE_PARALLEL + || H5AC_coll_read_dxpl_id > 0 +#endif /* H5_HAVE_PARALLEL */ + ) { +#if defined(H5_HAVE_PARALLEL) || defined(H5_DEBUG_BUILD) /* Indicate more work to do */ n = 1; /* H5I */ - /* Close H5AC dxpl */ - if(H5I_dec_ref(H5AC_dxpl_id) < 0 || + /* Close H5AC dxpls */ + if(H5I_dec_ref(H5AC_ind_read_dxpl_id) < 0 || H5I_dec_ref(H5AC_noio_dxpl_id) < 0 || - H5I_dec_ref(H5AC_rawdata_dxpl_id) < 0) + H5I_dec_ref(H5AC_rawdata_dxpl_id) < 0 +#ifdef H5_HAVE_PARALLEL + || H5I_dec_ref(H5AC_coll_read_dxpl_id) < 0 +#endif /* H5_HAVE_PARALLEL */ + ) H5E_clear_stack(NULL); /*ignore error*/ -#endif /* H5_DEBUG_BUILD */ +#endif /* defined(H5_HAVE_PARALLEL) || defined(H5_DEBUG_BUILD) */ + /* Reset static IDs */ - H5AC_dxpl_id = (-1); + H5AC_ind_read_dxpl_id = (-1); H5AC_noio_dxpl_id = (-1); H5AC_rawdata_dxpl_id = (-1); +#ifdef H5_HAVE_PARALLEL + H5AC_coll_read_dxpl_id = (-1); +#endif /* H5_HAVE_PARALLEL */ } /* end if */ /* Reset interface initialization flag */ @@ -491,7 +546,12 @@ H5AC_dest(H5F_t *f, hid_t dxpl_id) #endif /* H5AC__TRACE_FILE_ENABLED */ #ifdef H5_HAVE_PARALLEL + /* destroying the cache, so clear all collective entries */ + if(H5C_clear_coll_entries(f->shared->cache, 0) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_clear_coll_entries() failed.") + aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(f->shared->cache); + if(aux_ptr) /* Sanity check */ HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); @@ -629,6 +689,10 @@ H5AC_flush(H5F_t *f, hid_t dxpl_id) #endif /* H5AC__TRACE_FILE_ENABLED */ #ifdef H5_HAVE_PARALLEL + /* flushing the cache, so clear all collective entries */ + if(H5C_clear_coll_entries(f->shared->cache, 0) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_clear_coll_entries() failed.") + /* Attempt to flush all entries from rank 0 & Bcast clean list to other ranks */ if(H5AC__flush_entries(f, dxpl_id) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.") diff --git a/src/H5ACmpio.c b/src/H5ACmpio.c index 79b0b54..353805d 100644 --- a/src/H5ACmpio.c +++ b/src/H5ACmpio.c @@ -2065,6 +2065,14 @@ HDfprintf(stdout, "%d:H5AC_propagate...:%u: (u/uu/i/iu/r/ru) = %zu/%u/%zu/%u/%zu aux_ptr->rename_dirty_bytes_updates); #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ + /* clear collective access flag on half of the entries in the + cache and mark them as independent in case they need to be + evicted later. All ranks are guranteed to mark the same entires + since we don't modify the order of the collectively accessed + entries except through collective access. */ + if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_clear_coll_entries() failed.") + switch(aux_ptr->metadata_write_strategy) { case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY: switch(sync_point_op) { diff --git a/src/H5ACprivate.h b/src/H5ACprivate.h index f441854..7481f0a 100644 --- a/src/H5ACprivate.h +++ b/src/H5ACprivate.h @@ -184,7 +184,7 @@ typedef H5C_t H5AC_t; #ifdef H5_HAVE_PARALLEL /* Definitions for "collective metadata write" property */ #define H5AC_COLLECTIVE_META_WRITE_NAME "H5AC_collective_metadata_write" -#define H5AC_COLLECTIVE_META_WRITE_SIZE sizeof(unsigned) +#define H5AC_COLLECTIVE_META_WRITE_SIZE sizeof(hbool_t) #define H5AC_COLLECTIVE_META_WRITE_DEF 0 #endif /* H5_HAVE_PARALLEL */ @@ -196,6 +196,10 @@ typedef H5C_t H5AC_t; /* Dataset transfer property list for metadata calls */ H5_DLLVAR hid_t H5AC_dxpl_id; +extern hid_t H5AC_ind_read_dxpl_id; +#ifdef H5_HAVE_PARALLEL +extern hid_t H5AC_coll_read_dxpl_id; +#endif /* H5_HAVE_PARALLEL */ /* DXPL to be used in operations that will not result in I/O calls */ extern hid_t H5AC_noio_dxpl_id; @@ -88,7 +88,6 @@ #include "H5Cpkg.h" /* Cache */ #include "H5Eprivate.h" /* Error handling */ #include "H5Fpkg.h" /* Files */ -#include "H5FDprivate.h" /* File drivers */ #include "H5FLprivate.h" /* Free Lists */ #include "H5Iprivate.h" /* IDs */ #include "H5MFprivate.h" /* File memory management */ @@ -156,6 +155,9 @@ static herr_t H5C_flush_ring(H5F_t *f, hid_t dxpl_id, H5C_ring_t ring, static void * H5C_load_entry(H5F_t * f, hid_t dxpl_id, +#ifdef H5_HAVE_PARALLEL + hbool_t coll_access, +#endif /* H5_HAVE_PARALLEL */ const H5C_class_t * type, haddr_t addr, void * udata); @@ -180,6 +182,9 @@ static herr_t H5C_mark_tagged_entries(H5C_t * cache_ptr, static herr_t H5C_flush_marked_entries(H5F_t * f, hid_t dxpl_id); +static herr_t H5C__generate_image(H5F_t *f, H5C_t * cache_ptr, H5C_cache_entry_t *entry_ptr, + hid_t dxpl_id, int64_t *entry_size_change_ptr); + #if H5C_DO_TAGGING_SANITY_CHECKS static herr_t H5C_verify_tag(int id, haddr_t tag); #endif @@ -549,6 +554,13 @@ H5C_create(size_t max_cache_size, cache_ptr->LRU_head_ptr = NULL; cache_ptr->LRU_tail_ptr = NULL; +#ifdef H5_HAVE_PARALLEL + cache_ptr->coll_list_len = 0; + cache_ptr->coll_list_size = (size_t)0; + cache_ptr->coll_head_ptr = NULL; + cache_ptr->coll_tail_ptr = NULL; +#endif /* H5_HAVE_PARALLEL */ + cache_ptr->cLRU_list_len = 0; cache_ptr->cLRU_list_size = (size_t)0; cache_ptr->cLRU_head_ptr = NULL; @@ -970,6 +982,12 @@ H5C_expunge_entry(H5F_t *f, hid_t dxpl_id, const H5C_class_t *type, HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "Target entry is protected.") if(entry_ptr->is_pinned) HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "Target entry is pinned.") +#ifdef H5_HAVE_PARALLEL + if(entry_ptr->coll_access) { + entry_ptr->coll_access = FALSE; + H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, FAIL) + } /* end if */ +#endif /* H5_HAVE_PARALLEL */ /* If we get this far, call H5C__flush_single_entry() with the * H5C__FLUSH_INVALIDATE_FLAG and the H5C__FLUSH_CLEAR_ONLY_FLAG. @@ -987,8 +1005,8 @@ H5C_expunge_entry(H5F_t *f, hid_t dxpl_id, const H5C_class_t *type, /* Delete the entry from the skip list on destroy */ flush_flags |= H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG; - if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flush_flags, NULL) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "can't flush entry") + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flush_flags, NULL, NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "H5C_flush_single_entry() failed.") #if H5C_DO_SANITY_CHECKS if ( entry_was_dirty ) @@ -1755,6 +1773,9 @@ H5C_insert_entry(H5F_t * f, H5AC_ring_t ring = H5C_RING_UNDEFINED; hbool_t insert_pinned; hbool_t flush_last; +#ifdef H5_HAVE_PARALLEL + hbool_t coll_access = FALSE; /* whether access to the cache entry is done collectively */ +#endif /* H5_HAVE_PARALLEL */ hbool_t set_flush_marker; hbool_t write_permitted = TRUE; size_t empty_space; @@ -1892,6 +1913,9 @@ H5C_insert_entry(H5F_t * f, entry_ptr->aux_next = NULL; entry_ptr->aux_prev = NULL; + entry_ptr->coll_next = NULL; + entry_ptr->coll_prev = NULL; + H5C__RESET_CACHE_ENTRY_STATS(entry_ptr) if ( ( cache_ptr->flash_size_increase_possible ) && @@ -1993,6 +2017,45 @@ H5C_insert_entry(H5F_t * f, H5C__UPDATE_STATS_FOR_INSERTION(cache_ptr, entry_ptr) +#ifdef H5_HAVE_PARALLEL + /* Get the dataset transfer property list */ + if(NULL == (dxpl = (H5P_genplist_t *)H5I_object(dxpl_id))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a property list"); + + if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) { + coll_access = (H5P_USER_TRUE == f->coll_md_read ? TRUE : FALSE); + + if(!coll_access && H5P_FORCE_FALSE != f->coll_md_read) { + H5P_coll_md_read_flag_t prop_value; + + /* Get the property value */ + if(H5P_get(dxpl, H5_COLL_MD_READ_FLAG_NAME, &prop_value) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "Can't get collective metadata access flag") + coll_access = (H5P_USER_TRUE == prop_value ? TRUE : FALSE); + } /* end if */ + } /* end if */ + + entry_ptr->coll_access = coll_access; + if(coll_access) { + H5C__INSERT_IN_COLL_LIST(cache_ptr, entry_ptr, FAIL) + + /* Make sure the size of the collective entries in the cache remain in check */ + if(H5P_USER_TRUE == f->coll_md_read) { + if(cache_ptr->max_cache_size*80 < cache_ptr->coll_list_size*100) { + if(H5C_clear_coll_entries(cache_ptr, 1) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_clear_coll_entries() failed.") + } /* end if */ + } /* end if */ + else { + if(cache_ptr->max_cache_size*40 < cache_ptr->coll_list_size*100) { + if(H5C_clear_coll_entries(cache_ptr, 1) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_clear_coll_entries() failed.") + } /* end if */ + } /* end else */ + } /* end if */ + entry_ptr->ind_access_while_coll = FALSE; +#endif + done: #if H5C_DO_EXTREME_SANITY_CHECKS if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) || @@ -2349,6 +2412,14 @@ H5C_resize_entry(void *thing, size_t new_size) (entry_ptr->size), (new_size)) } /* end if */ +#ifdef H5_HAVE_PARALLEL + if(entry_ptr->coll_access) { + H5C__DLL_UPDATE_FOR_SIZE_CHANGE((cache_ptr->coll_list_len), \ + (cache_ptr->coll_list_size), \ + (entry_ptr->size), (new_size)) + } /* end if */ +#endif /* H5_HAVE_PARALLEL */ + /* update the hash table */ H5C__UPDATE_INDEX_FOR_SIZE_CHANGE((cache_ptr), (entry_ptr->size),\ (new_size), (entry_ptr), (was_clean)); @@ -2575,6 +2646,9 @@ H5C_protect(H5F_t * f, hbool_t have_write_permitted = FALSE; hbool_t read_only = FALSE; hbool_t flush_last; +#ifdef H5_HAVE_PARALLEL + hbool_t coll_access = FALSE; /* whether access to the cache entry is done collectively */ +#endif /* H5_HAVE_PARALLEL */ hbool_t write_permitted; size_t empty_space; void * thing; @@ -2616,6 +2690,21 @@ H5C_protect(H5F_t * f, if((H5P_get(dxpl, H5AC_RING_NAME, &ring)) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, "unable to query ring value") +#ifdef H5_HAVE_PARALLEL + if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) { + coll_access = (H5P_USER_TRUE == f->coll_md_read ? TRUE : FALSE); + + if(!coll_access && H5P_FORCE_FALSE != f->coll_md_read) { + H5P_coll_md_read_flag_t prop_value; + + /* get the property value */ + if(H5P_get(dxpl, H5_COLL_MD_READ_FLAG_NAME, &prop_value) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "Can't get collective metadata access flag") + coll_access = (H5P_USER_TRUE == prop_value ? TRUE : FALSE); + } /* end if */ + } /* end if */ +#endif /* H5_HAVE_PARALLEL */ + /* first check to see if the target is in cache */ H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, NULL) @@ -2627,6 +2716,62 @@ H5C_protect(H5F_t * f, if(entry_ptr->type != type) HGOTO_ERROR(H5E_CACHE, H5E_BADTYPE, NULL, "incorrect cache entry type") + /* if this is a collective metadata read, the entry is not + marked as collective, and is clean, it is possible that + other processes will not have it in its cache and will + expect a bcast of the entry from process 0. So process 0 + will bcast the entry to all other ranks. Ranks that do have + the entry in their cache still have to participate in the + bcast. */ +#ifdef H5_HAVE_PARALLEL + if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI) && coll_access && + !(entry_ptr->is_dirty) && !(entry_ptr->coll_access)) { + MPI_Comm comm; /* File MPI Communicator */ + int mpi_code; /* MPI error code */ + int buf_size; + + if(MPI_COMM_NULL == (comm = H5F_mpi_get_comm(f))) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "get_comm request failed") + + if(entry_ptr->image_ptr == NULL) { + int mpi_rank; + size_t image_size; + + if((mpi_rank = H5F_mpi_get_rank(f)) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "Can't get MPI rank") + + if(entry_ptr->compressed) + image_size = entry_ptr->compressed_size; + else + image_size = entry_ptr->size; + HDassert(image_size > 0); + + if(NULL == (entry_ptr->image_ptr = H5MM_malloc(image_size + H5C_IMAGE_EXTRA_SPACE))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, NULL, "memory allocation failed for on disk image buffer") +#if H5C_DO_MEMORY_SANITY_CHECKS + HDmemcpy(((uint8_t *)entry_ptr->image_ptr) + image_size, + H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE); +#endif /* H5C_DO_MEMORY_SANITY_CHECKS */ + if(0 == mpi_rank) + if(H5C__generate_image(f, cache_ptr, entry_ptr, dxpl_id, NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, "Can't get Image") + } /* end if */ + + HDassert(entry_ptr->image_ptr); + + H5_CHECKED_ASSIGN(buf_size, int, entry_ptr->size, size_t); + if(MPI_SUCCESS != (mpi_code = MPI_Bcast(entry_ptr->image_ptr, buf_size, MPI_BYTE, 0, comm))) + HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code) + + entry_ptr->coll_access = TRUE; + + H5C__INSERT_IN_COLL_LIST(cache_ptr, entry_ptr, NULL) + } /* end if */ + else if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI) && coll_access && entry_ptr->coll_access) { + H5C__MOVE_TO_TOP_IN_COLL_LIST(cache_ptr, entry_ptr, NULL) + } /* end else-if */ +#endif /* H5_HAVE_PARALLEL */ + #if H5C_DO_TAGGING_SANITY_CHECKS { haddr_t tag = HADDR_UNDEF; @@ -2659,7 +2804,11 @@ H5C_protect(H5F_t * f, hit = FALSE; - thing = H5C_load_entry(f, dxpl_id, type, addr, udata); + thing = H5C_load_entry(f, dxpl_id, +#ifdef H5_HAVE_PARALLEL + coll_access, +#endif /* H5_HAVE_PARALLEL */ + type, addr, udata); if ( thing == NULL ) { @@ -2668,6 +2817,10 @@ H5C_protect(H5F_t * f, entry_ptr = (H5C_cache_entry_t *)thing; entry_ptr->ring = ring; +#ifdef H5_HAVE_PARALLEL + if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI) && entry_ptr->coll_access) + H5C__INSERT_IN_COLL_LIST(cache_ptr, entry_ptr, NULL) +#endif /* H5_HAVE_PARALLEL */ /* Apply tag to newly protected entry */ if(H5C_tag_entry(cache_ptr, entry_ptr, dxpl_id) < 0) @@ -2901,6 +3054,24 @@ H5C_protect(H5F_t * f, } } +#ifdef H5_HAVE_PARALLEL + if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) { + /* Make sure the size of the collective entries in the cache remain in check */ + if(TRUE == coll_access) { + if(H5P_USER_TRUE == f->coll_md_read) { + if(cache_ptr->max_cache_size * 80 < cache_ptr->coll_list_size * 100) + if(H5C_clear_coll_entries(cache_ptr, 1) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, "H5C_clear_coll_entries() failed.") + } /* end if */ + else { + if(cache_ptr->max_cache_size * 40 < cache_ptr->coll_list_size * 100) + if(H5C_clear_coll_entries(cache_ptr, 1) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, "H5C_clear_coll_entries() failed.") + } /* end else */ + } /* end if */ + } /* end if */ +#endif /* H5_HAVE_PARALLEL */ + done: #if H5C_DO_EXTREME_SANITY_CHECKS @@ -4544,7 +4715,7 @@ H5C_unprotect(H5F_t * f, /* Delete the entry from the skip list on destroy */ flush_flags |= H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG; - if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flush_flags, NULL) < 0) + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flush_flags, NULL, NULL) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "Can't flush entry") #if H5C_DO_SANITY_CHECKS @@ -4570,7 +4741,7 @@ H5C_unprotect(H5F_t * f, else if(test_entry_ptr != entry_ptr) HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "hash table contains multiple entries for addr?!?.") - if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0) + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL, NULL) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "Can't clear entry") } #endif /* H5_HAVE_PARALLEL */ @@ -5863,7 +6034,7 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f, cache_ptr->entries_removed_counter = 0; cache_ptr->last_entry_removed_ptr = NULL; - if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__NO_FLAGS_SET, NULL) < 0) + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__NO_FLAGS_SET, NULL, NULL) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry") if ( ( cache_ptr->entries_removed_counter > 1 ) || @@ -5875,7 +6046,7 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f, bytes_evicted += entry_ptr->size; - if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0 ) + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL, NULL) < 0 ) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry") } @@ -5955,7 +6126,7 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f, prev_ptr = entry_ptr->prev; if ( ! (entry_ptr->is_dirty) ) { - if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0) + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL, NULL) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush clean entry") } /* just skip the entry if it is dirty, as we can't do @@ -6803,7 +6974,7 @@ H5C_flush_invalidate_ring(const H5F_t * f, hid_t dxpl_id, H5C_ring_t ring, entry_size_change = 0; #endif /* H5C_DO_SANITY_CHECKS */ - if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__NO_FLAGS_SET, entry_size_change_ptr) < 0) + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__NO_FLAGS_SET, entry_size_change_ptr, NULL) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "dirty pinned entry flush failed.") #if H5C_DO_SANITY_CHECKS /* entry size may have changed during the flush. @@ -6848,9 +7019,9 @@ H5C_flush_invalidate_ring(const H5F_t * f, hid_t dxpl_id, H5C_ring_t ring, entry_size_change = 0; #endif /* H5C_DO_SANITY_CHECKS */ - if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, (cooked_flags | H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG), - entry_size_change_ptr) < 0) + entry_size_change_ptr, NULL) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "dirty entry flush destroy failed.") #if H5C_DO_SANITY_CHECKS /* entry size may have changed during the flush. @@ -6971,8 +7142,8 @@ H5C_flush_invalidate_ring(const H5F_t * f, hid_t dxpl_id, H5C_ring_t ring, entry_was_dirty = entry_ptr->is_dirty; if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, - (cooked_flags | H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG), - NULL) < 0) + (cooked_flags | H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG), + NULL, NULL) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Entry flush destroy failed.") if(entry_was_dirty) { @@ -7342,7 +7513,7 @@ H5C_flush_ring(H5F_t *f, hid_t dxpl_id, H5C_ring_t ring, unsigned flags) entry_size_change = 0; #endif /* H5C_DO_SANITY_CHECKS */ - if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flags, entry_size_change_ptr) < 0) + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flags, entry_size_change_ptr, NULL) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "dirty pinned entry flush failed.") #if H5C_DO_SANITY_CHECKS @@ -7387,7 +7558,7 @@ H5C_flush_ring(H5F_t *f, hid_t dxpl_id, H5C_ring_t ring, unsigned flags) flushed_entries_size += (int64_t)entry_ptr->size; entry_size_change = 0; #endif /* H5C_DO_SANITY_CHECKS */ - if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flags, entry_size_change_ptr) < 0) + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flags, entry_size_change_ptr, NULL) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush entry.") #if H5C_DO_SANITY_CHECKS @@ -7536,7 +7707,7 @@ done: */ herr_t H5C__flush_single_entry(const H5F_t *f, hid_t dxpl_id, H5C_cache_entry_t *entry_ptr, - unsigned flags, int64_t *entry_size_change_ptr) + unsigned flags, int64_t *entry_size_change_ptr, H5SL_t *collective_write_list) { H5C_t * cache_ptr; /* Cache for file */ hbool_t destroy; /* external flag */ @@ -7582,6 +7753,10 @@ H5C__flush_single_entry(const H5F_t *f, hid_t dxpl_id, H5C_cache_entry_t *entry_ else destroy_entry = destroy; +#ifdef H5_HAVE_PARALLEL + HDassert(FALSE == entry_ptr->coll_access); +#endif + /* we will write the entry to disk if it exists, is dirty, and if the * clear only flag is not set. */ @@ -7914,6 +8089,25 @@ H5C__flush_single_entry(const H5F_t *f, hid_t dxpl_id, H5C_cache_entry_t *entry_ else image_size = entry_ptr->size; +#ifdef H5_HAVE_PARALLEL + if(collective_write_list) { + H5C_collective_write_t *item = NULL; + + if(NULL == (item = (H5C_collective_write_t *)H5MM_malloc(sizeof(H5C_collective_write_t)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "unable to allocate skip list item") + + item->length = image_size; + item->free_buf = FALSE; + item->buf = entry_ptr->image_ptr; + item->offset = entry_ptr->addr; + + if(H5SL_insert(collective_write_list, item, &item->offset) < 0) { + H5MM_free(item); + HGOTO_ERROR(H5E_HEAP, H5E_CANTINSERT, FAIL, "unable to insert skip list item") + } /* end if */ + } /* end if */ + else +#endif /* H5_HAVE_PARALLEL */ if(H5F_block_write(f, entry_ptr->type->mem_type, entry_ptr->addr, image_size, dxpl_id, entry_ptr->image_ptr) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't write image to file.") @@ -8170,6 +8364,9 @@ done: static void * H5C_load_entry(H5F_t * f, hid_t dxpl_id, +#ifdef H5_HAVE_PARALLEL + hbool_t coll_access, +#endif /* H5_HAVE_PARALLEL */ const H5C_class_t * type, haddr_t addr, void * udata) @@ -8188,6 +8385,11 @@ H5C_load_entry(H5F_t * f, H5C_cache_entry_t * entry; /* Alias for thing loaded, as cache entry */ size_t len; /* Size of image in file */ unsigned u; /* Local index variable */ +#ifdef H5_HAVE_PARALLEL + int mpi_rank = 0; /* MPI process rank */ + MPI_Comm comm = MPI_COMM_NULL; /* File MPI Communicator */ + int mpi_code; /* MPI error code */ +#endif /* H5_HAVE_PARALLEL */ void * ret_value = NULL; /* Return value */ FUNC_ENTER_NOAPI_NOINIT @@ -8332,10 +8534,37 @@ H5C_load_entry(H5F_t * f, HDmemcpy(image + len, H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE); #endif /* H5C_DO_MEMORY_SANITY_CHECKS */ +#ifdef H5_HAVE_PARALLEL + if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) { + if((mpi_rank = H5F_mpi_get_rank(f)) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "Can't get MPI rank") + if((comm = H5F_mpi_get_comm(f)) == MPI_COMM_NULL) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "get_comm request failed") + } /* end if */ +#endif /* H5_HAVE_PARALLEL */ + /* Get the on-disk entry image */ - if(0 == (type->flags & H5C__CLASS_SKIP_READS)) - if(H5F_block_read(f, type->mem_type, addr, len, dxpl_id, image) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_READERROR, NULL, "Can't read image*") + if(0 == (type->flags & H5C__CLASS_SKIP_READS)) { +#ifdef H5_HAVE_PARALLEL + if(!coll_access || 0 == mpi_rank) { +#endif /* H5_HAVE_PARALLEL */ + if(H5F_block_read(f, type->mem_type, addr, len, dxpl_id, image) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_READERROR, NULL, "Can't read image*") + +#ifdef H5_HAVE_PARALLEL + } /* end if */ + /* if the collective metadata read optimization is turned on, + bcast the metadata read from process 0 to all ranks in the file + communicator */ + if(coll_access) { + int buf_size; + + H5_CHECKED_ASSIGN(buf_size, int, len, size_t); + if(MPI_SUCCESS != (mpi_code = MPI_Bcast(image, buf_size, MPI_BYTE, 0, comm))) + HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code) + } /* end if */ +#endif /* H5_HAVE_PARALLEL */ + } /* end if */ /* Deserialize the on-disk image into the native memory form */ if(NULL == (thing = type->deserialize(image, len, udata, &dirty))) @@ -8438,11 +8667,28 @@ H5C_load_entry(H5F_t * f, HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL, "free_icr callback failed") +#ifdef H5_HAVE_PARALLEL + if(!coll_access || 0 == mpi_rank) { +#endif /* H5_HAVE_PARALLEL */ /* Go get the on-disk image again */ if(H5F_block_read(f, type->mem_type, addr, new_len, dxpl_id, image) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTLOAD, NULL, "Can't read image") +#ifdef H5_HAVE_PARALLEL + } + /* if the collective metadata read optimization is turned on, + bcast the metadata read from process 0 to all ranks in the file + communicator */ + if(coll_access) { + int buf_size; + + H5_CHECKED_ASSIGN(buf_size, int, new_len, size_t); + if(MPI_SUCCESS != (mpi_code = MPI_Bcast(image, buf_size, MPI_BYTE, 0, comm))) + HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code) + } /* end if */ +#endif /* H5_HAVE_PARALLEL */ + /* Deserialize on-disk image into native memory form again */ if(NULL == (thing = type->deserialize(image, new_len, udata, &dirty))) @@ -8531,6 +8777,8 @@ H5C_load_entry(H5F_t * f, #ifdef H5_HAVE_PARALLEL entry->clear_on_unprotect = FALSE; entry->flush_immediately = FALSE; + entry->coll_access = coll_access; + entry->ind_access_while_coll = FALSE; #endif /* H5_HAVE_PARALLEL */ entry->flush_in_progress = FALSE; entry->destroy_in_progress = FALSE; @@ -8551,6 +8799,9 @@ H5C_load_entry(H5F_t * f, entry->aux_next = NULL; entry->aux_prev = NULL; + entry->coll_next = NULL; + entry->coll_prev = NULL; + H5C__RESET_CACHE_ENTRY_STATS(entry); ret_value = thing; @@ -8743,7 +8994,14 @@ H5C_make_space_in_cache(H5F_t * f, cache_ptr->entries_removed_counter = 0; cache_ptr->last_entry_removed_ptr = NULL; - if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__NO_FLAGS_SET, NULL) < 0) +#ifdef H5_HAVE_PARALLEL + if(TRUE == entry_ptr->coll_access) { + entry_ptr->coll_access = FALSE; + H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, FAIL) + } /* end if */ +#endif + + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__NO_FLAGS_SET, NULL, NULL) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry") if ( ( cache_ptr->entries_removed_counter > 1 ) || @@ -8751,14 +9009,23 @@ H5C_make_space_in_cache(H5F_t * f, restart_scan = TRUE; - } else if ( (cache_ptr->index_size + space_needed) - > - cache_ptr->max_cache_size ) { + } else if ( (cache_ptr->index_size + space_needed) > cache_ptr->max_cache_size +#ifdef H5_HAVE_PARALLEL + && !(entry_ptr->coll_access) +#endif /* H5_HAVE_PARALLEL */ + ) { #if H5C_COLLECT_CACHE_STATS cache_ptr->entries_scanned_to_make_space++; #endif /* H5C_COLLECT_CACHE_STATS */ - if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0) +#ifdef H5_HAVE_PARALLEL + if(TRUE == entry_ptr->coll_access) { + entry_ptr->coll_access = FALSE; + H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, FAIL) + } /* end if */ +#endif + + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL, NULL) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry") } else { /* We have enough space so don't flush clean entry. */ @@ -8894,8 +9161,14 @@ H5C_make_space_in_cache(H5F_t * f, prev_ptr = entry_ptr->aux_prev; - if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry") +#ifdef H5_HAVE_PARALLEL + if(!(entry_ptr->coll_access)) { +#endif /* H5_HAVE_PARALLEL */ + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL, NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry") +#ifdef H5_HAVE_PARALLEL + } /* end if */ +#endif /* H5_HAVE_PARALLEL */ /* we are scanning the clean LRU, so the serialize function * will not be called on any entry -- thus there is no @@ -9934,3 +10207,246 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* H5C_get_entry_ring() */ +static herr_t +H5C__generate_image(H5F_t *f, H5C_t * cache_ptr, H5C_cache_entry_t *entry_ptr, + hid_t dxpl_id, int64_t *entry_size_change_ptr) +{ + haddr_t new_addr = HADDR_UNDEF; + haddr_t old_addr = HADDR_UNDEF; + size_t new_len = 0; + size_t new_compressed_len = 0; + unsigned serialize_flags = H5C__SERIALIZE_NO_FLAGS_SET; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(!entry_ptr->image_up_to_date); + + /* reset cache_ptr->slist_changed so we can detect slist + * modifications in the pre_serialize call. + */ + cache_ptr->slist_changed = FALSE; + + /* make note of the entry's current address */ + old_addr = entry_ptr->addr; + + /* Call client's pre-serialize callback, if there's one */ + if ( ( entry_ptr->type->pre_serialize != NULL ) && + ( (entry_ptr->type->pre_serialize)(f, dxpl_id, + (void *)entry_ptr, + entry_ptr->addr, + entry_ptr->size, + entry_ptr->compressed_size, + &new_addr, &new_len, + &new_compressed_len, + &serialize_flags) < 0 ) ) { + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ + "unable to pre-serialize entry"); + } + + /* set cache_ptr->slist_change_in_pre_serialize if the + * slist was modified. + */ + if ( cache_ptr->slist_changed ) + cache_ptr->slist_change_in_pre_serialize = TRUE; + + /* Check for any flags set in the pre-serialize callback */ + if ( serialize_flags != H5C__SERIALIZE_NO_FLAGS_SET ) { + /* Check for unexpected flags from serialize callback */ + if ( serialize_flags & ~(H5C__SERIALIZE_RESIZED_FLAG | + H5C__SERIALIZE_MOVED_FLAG | + H5C__SERIALIZE_COMPRESSED_FLAG)) { + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ + "unknown serialize flag(s)"); + } + +#ifdef H5_HAVE_PARALLEL + if ( cache_ptr->aux_ptr != NULL ) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "resize/move in serialize occured in parallel case."); +#endif + + /* Resize the buffer if required */ + if ( ( ( ! entry_ptr->compressed ) && + ( serialize_flags & H5C__SERIALIZE_RESIZED_FLAG ) ) || + ( ( entry_ptr->compressed ) && + ( serialize_flags & H5C__SERIALIZE_COMPRESSED_FLAG ) ) ) { + size_t new_image_size; + + if ( entry_ptr->compressed ) + new_image_size = new_compressed_len; + else + new_image_size = new_len; + + HDassert(new_image_size > 0); + + /* Release the current image */ + if ( entry_ptr->image_ptr ) { + entry_ptr->image_ptr = H5MM_xfree(entry_ptr->image_ptr); + } + + /* Allocate a new image buffer */ + entry_ptr->image_ptr = + H5MM_malloc(new_image_size + H5C_IMAGE_EXTRA_SPACE); + + if ( NULL == entry_ptr->image_ptr ) + { + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, \ + "memory allocation failed for on disk image buffer"); + } + +#if H5C_DO_MEMORY_SANITY_CHECKS + + HDmemcpy(((uint8_t *)entry_ptr->image_ptr) + new_image_size, + H5C_IMAGE_SANITY_VALUE, + H5C_IMAGE_EXTRA_SPACE); + +#endif /* H5C_DO_MEMORY_SANITY_CHECKS */ + + } /* end if */ + + /* If required, update the entry and the cache data structures + * for a resize. + */ + if ( serialize_flags & H5C__SERIALIZE_RESIZED_FLAG ) { + + H5C__UPDATE_STATS_FOR_ENTRY_SIZE_CHANGE(cache_ptr, \ + entry_ptr, new_len); + + /* update the hash table for the size change*/ + H5C__UPDATE_INDEX_FOR_SIZE_CHANGE(cache_ptr, \ + entry_ptr->size, \ + new_len, entry_ptr, \ + !(entry_ptr->is_dirty)); + + /* The entry can't be protected since we are + * in the process of flushing it. Thus we must + * update the replacement policy data + * structures for the size change. The macro + * deals with the pinned case. + */ + H5C__UPDATE_RP_FOR_SIZE_CHANGE(cache_ptr, entry_ptr, new_len); + + /* as we haven't updated the cache data structures for + * for the flush or flush destroy yet, the entry should + * be in the slist. Thus update it for the size change. + */ + HDassert(entry_ptr->in_slist); + H5C__UPDATE_SLIST_FOR_SIZE_CHANGE(cache_ptr, entry_ptr->size, \ + new_len); + + /* if defined, update *entry_size_change_ptr for the + * change in entry size. + */ + if ( entry_size_change_ptr != NULL ) + { + *entry_size_change_ptr = (int64_t)new_len; + *entry_size_change_ptr -= (int64_t)(entry_ptr->size); + } + + /* finally, update the entry for its new size */ + entry_ptr->size = new_len; + } /* end if */ + + /* If required, udate the entry and the cache data structures + * for a move + */ + if(serialize_flags & H5C__SERIALIZE_MOVED_FLAG) { +#if H5C_DO_SANITY_CHECKS + int64_t saved_slist_len_increase; + int64_t saved_slist_size_increase; +#endif /* H5C_DO_SANITY_CHECKS */ + + H5C__UPDATE_STATS_FOR_MOVE(cache_ptr, entry_ptr); + + if ( entry_ptr->addr == old_addr ) { + /* we must update cache data structures for the + * change in address. + */ + + /* delete the entry from the hash table and the slist */ + H5C__DELETE_FROM_INDEX(cache_ptr, entry_ptr); + H5C__REMOVE_ENTRY_FROM_SLIST(cache_ptr, entry_ptr); + + /* update the entry for its new address */ + entry_ptr->addr = new_addr; + + /* and then reinsert in the index and slist */ + H5C__INSERT_IN_INDEX(cache_ptr, entry_ptr, FAIL); + +#if H5C_DO_SANITY_CHECKS + /* save cache_ptr->slist_len_increase and + * cache_ptr->slist_size_increase before the + * reinsertion into the slist, and restore + * them afterwards to avoid skewing our sanity + * checking. + */ + saved_slist_len_increase = cache_ptr->slist_len_increase; + saved_slist_size_increase = cache_ptr->slist_size_increase; +#endif /* H5C_DO_SANITY_CHECKS */ + + H5C__INSERT_ENTRY_IN_SLIST(cache_ptr, entry_ptr, FAIL); + +#if H5C_DO_SANITY_CHECKS + cache_ptr->slist_len_increase = saved_slist_len_increase; + cache_ptr->slist_size_increase = saved_slist_size_increase; +#endif /* H5C_DO_SANITY_CHECKS */ + } + else { + HDassert(entry_ptr->addr == new_addr); + } + } /* end if */ + + if ( serialize_flags & H5C__SERIALIZE_COMPRESSED_FLAG ) { + /* just save the new compressed entry size in + * entry_ptr->compressed_size. We don't need to + * do more, as compressed size is only used for I/O. + */ + HDassert(entry_ptr->compressed); + entry_ptr->compressed_size = new_compressed_len; + } + } /* end if ( serialize_flags != H5C__SERIALIZE_NO_FLAGS_SET ) */ + + /* Serialize object into buffer */ + { + size_t image_len; + + if ( entry_ptr->compressed ) + image_len = entry_ptr->compressed_size; + else + image_len = entry_ptr->size; + + /* reset cache_ptr->slist_changed so we can detect slist + * modifications in the serialize call. + */ + cache_ptr->slist_changed = FALSE; + + + if ( entry_ptr->type->serialize(f, entry_ptr->image_ptr, + image_len, + (void *)entry_ptr) < 0) { + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ + "unable to serialize entry"); + } + + /* set cache_ptr->slist_change_in_serialize if the + * slist was modified. + */ + if ( cache_ptr->slist_changed ) + cache_ptr->slist_change_in_pre_serialize = TRUE; + +#if H5C_DO_MEMORY_SANITY_CHECKS + + HDassert(0 == HDmemcmp(((uint8_t *)entry_ptr->image_ptr) + + image_len, + H5C_IMAGE_SANITY_VALUE, + H5C_IMAGE_EXTRA_SPACE)); + +#endif /* H5C_DO_MEMORY_SANITY_CHECKS */ + + entry_ptr->image_up_to_date = TRUE; + } + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5C__generate_image */ diff --git a/src/H5Cmpio.c b/src/H5Cmpio.c index 6e8d94c..e17aacd 100644 --- a/src/H5Cmpio.c +++ b/src/H5Cmpio.c @@ -39,11 +39,14 @@ #include "H5private.h" /* Generic Functions */ #include "H5ACprivate.h" /* Metadata cache */ #include "H5Cpkg.h" /* Cache */ +#include "H5Dprivate.h" /* Datasets */ #include "H5Eprivate.h" /* Error handling */ #include "H5Fpkg.h" /* Files */ +#include "H5FDprivate.h" /* File drivers */ #include "H5Iprivate.h" /* IDs */ #include "H5MMprivate.h" /* Memory management */ - +#include "H5Pprivate.h" /* Property lists */ +#include "H5SLprivate.h" /* Skip lists */ #ifdef H5_HAVE_PARALLEL @@ -221,6 +224,7 @@ H5C_apply_candidate_list(H5F_t * f, H5C_cache_entry_t * entry_ptr = NULL; H5C_cache_entry_t * flush_ptr = NULL; H5C_cache_entry_t * delayed_ptr = NULL; + H5SL_t * collective_write_list = NULL; #if H5C_DO_SANITY_CHECKS haddr_t last_addr; #endif /* H5C_DO_SANITY_CHECKS */ @@ -253,6 +257,12 @@ H5C_apply_candidate_list(H5F_t * f, HDfprintf(stdout, "%s", tbl_buf); #endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ + if(f->coll_md_write) { + /* Create skip list of entries for collective write */ + if(NULL == (collective_write_list = H5SL_create(H5SL_TYPE_HADDR, NULL))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTCREATE, FAIL, "can't create skip list for entries") + } + n = num_candidates / mpi_size; m = num_candidates % mpi_size; HDassert(n >= 0); @@ -357,6 +367,16 @@ H5C_apply_candidate_list(H5F_t * f, entries_to_clear++; entry_ptr->clear_on_unprotect = TRUE; } /* end else */ + + /* Entries marked as collectively accessed and are in the + candidate list to clear from the cache have to be + removed from the coll list. This is OK since the + candidate list is collective and uniform across all + ranks. */ + if(TRUE == entry_ptr->coll_access) { + entry_ptr->coll_access = FALSE; + H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, FAIL) + } } /* end else */ } /* end for */ @@ -433,7 +453,9 @@ H5C_apply_candidate_list(H5F_t * f, * will not call either the pre_serialize or serialize callbacks. */ - if(H5C__flush_single_entry(f, dxpl_id, clear_ptr, H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0) + if(H5C__flush_single_entry(f, dxpl_id, clear_ptr, + H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, + NULL, NULL) < 0) HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") } /* end if */ @@ -478,7 +500,8 @@ H5C_apply_candidate_list(H5F_t * f, cache_ptr->entries_removed_counter = 0; cache_ptr->last_entry_removed_ptr = NULL; - if(H5C__flush_single_entry(f, dxpl_id, flush_ptr, H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0) + if(H5C__flush_single_entry(f, dxpl_id, flush_ptr, H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, + NULL, collective_write_list) < 0) HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't flush entry.") if ( ( cache_ptr->entries_removed_counter > 1 ) || @@ -636,7 +659,9 @@ H5C_apply_candidate_list(H5F_t * f, (long long)clear_ptr->addr); #endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - if(H5C__flush_single_entry(f, dxpl_id, clear_ptr, H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0) + if(H5C__flush_single_entry(f, dxpl_id, clear_ptr, + H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, + NULL, NULL) < 0) HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") } /* end else-if */ @@ -652,7 +677,8 @@ H5C_apply_candidate_list(H5F_t * f, (long long)flush_ptr->addr); #endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - if(H5C__flush_single_entry(f, dxpl_id, flush_ptr, H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0) + if(H5C__flush_single_entry(f, dxpl_id, flush_ptr, H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, + NULL, collective_write_list) < 0) HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") } /* end else-if */ } /* end if */ @@ -686,20 +712,34 @@ H5C_apply_candidate_list(H5F_t * f, if (delayed_ptr) { if (delayed_ptr->clear_on_unprotect) { + if(H5C__flush_single_entry(f, dxpl_id, delayed_ptr, H5C__FLUSH_CLEAR_ONLY_FLAG, + NULL, NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't flush entry collectively.") + entry_ptr->clear_on_unprotect = FALSE; entries_cleared++; } else if (delayed_ptr->flush_immediately) { + if(H5C__flush_single_entry(f, dxpl_id, delayed_ptr, H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, + NULL, collective_write_list) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't flush entry collectively.") + entry_ptr->flush_immediately = FALSE; entries_flushed++; } /* end if */ - if(H5C__flush_single_entry(f, dxpl_id, delayed_ptr, H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't flush entry collectively.") - entries_flushed_collectively++; entries_flushed_or_cleared_last++; } /* end if */ + if(f->coll_md_write) { + HDassert(collective_write_list); + + /* Write collective list */ + if(H5C_collective_write(f, + dxpl_id, + collective_write_list) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't write metadata collectively") + } /* ====================================================================== * * Finished flushing everything. * * ====================================================================== */ @@ -719,6 +759,10 @@ done: if(candidate_assignment_table != NULL) candidate_assignment_table = (int *)H5MM_xfree((void *)candidate_assignment_table); + if(collective_write_list) + if(H5SL_destroy(collective_write_list, H5C_collective_write_free, NULL) < 0) + HDONE_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "failed to destroy skip list") + FUNC_LEAVE_NOAPI(ret_value) } /* H5C_apply_candidate_list() */ @@ -1082,6 +1126,14 @@ H5C_mark_entries_as_clean(H5F_t * f, * scan the LRU list shortly, and clear all those entries * not currently protected. */ + + /* Make sure first that we clear the collective flag from + it so it can be cleared */ + if(TRUE == entry_ptr->coll_access) { + entry_ptr->coll_access = FALSE; + H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, FAIL) + } + entry_ptr->clear_on_unprotect = TRUE; #if H5C_DO_SANITY_CHECKS if ( entry_ptr->is_protected ) { @@ -1142,7 +1194,9 @@ H5C_mark_entries_as_clean(H5F_t * f, entry_ptr = entry_ptr->prev; entries_cleared++; - if(H5C__flush_single_entry(f, dxpl_id, clear_ptr, H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0) + if(H5C__flush_single_entry(f, dxpl_id, clear_ptr, + H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, + NULL, NULL) < 0) HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") } else { @@ -1170,7 +1224,9 @@ H5C_mark_entries_as_clean(H5F_t * f, entry_ptr = entry_ptr->next; entries_cleared++; - if(H5C__flush_single_entry(f, dxpl_id, clear_ptr, H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0 ) + if(H5C__flush_single_entry(f, dxpl_id, clear_ptr, + H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, + NULL, NULL) < 0 ) HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") } else { @@ -1215,5 +1271,205 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* H5C_mark_entries_as_clean() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5C_clear_coll_entries + * + * Purpose: Clear half or the entire list of collective entries and + * mark them as independent. + * + * Return: FAIL if error is detected, SUCCEED otherwise. + * + * Programmer: Mohamad Chaarawi + * April, 2015 + * + *------------------------------------------------------------------------- + */ +herr_t +H5C_clear_coll_entries(H5C_t * cache_ptr, hbool_t partial) +{ + int32_t list_len, coll_entries_cleared = 0; + H5C_cache_entry_t * entry_ptr = NULL; + H5C_cache_entry_t * prev_ptr; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + entry_ptr = cache_ptr->coll_tail_ptr; + list_len = cache_ptr->coll_list_len; + + while(entry_ptr && (coll_entries_cleared < (partial ? list_len/2 : list_len))) { + prev_ptr = entry_ptr->coll_prev; + + HDassert(entry_ptr->coll_access); + + entry_ptr->coll_access = FALSE; + H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, FAIL) + coll_entries_cleared ++; + + entry_ptr = prev_ptr; + } + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* H5C_clear_coll_entries */ + +herr_t +H5C_collective_write(H5F_t *f, + hid_t dxpl_id, + H5SL_t *collective_write_list) +{ + H5P_genplist_t *plist = NULL; + H5FD_mpio_xfer_t xfer_mode = H5FD_MPIO_COLLECTIVE; + H5FD_mpio_xfer_t orig_xfer_mode; + H5SL_node_t *node; + H5C_collective_write_t *item; + int count; + void *base_buf; + int *length_array = NULL; + MPI_Aint *buf_array = NULL; + MPI_Aint *offset_array = NULL; + MPI_Datatype btype; + MPI_Datatype ftype; + hbool_t btype_created = FALSE; + hbool_t ftype_created = FALSE; + int mpi_code; + int i; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + count = (int)H5SL_count(collective_write_list); + + if(NULL == (plist = (H5P_genplist_t *)H5I_object(dxpl_id))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a data transfer property list") + + if(H5P_get(plist, H5D_XFER_IO_XFER_MODE_NAME, &orig_xfer_mode) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O property") + + if(count > 0) { + if(H5P_set(plist, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O property") + + /* Allocate arrays */ + if(NULL == (length_array = (int *)H5MM_malloc((size_t)count * sizeof(int)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for collective write table length array") + if(NULL == (buf_array = (MPI_Aint *)H5MM_malloc((size_t)count * sizeof(MPI_Aint)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for collective buf table length array") + if(NULL == (offset_array = (MPI_Aint *)H5MM_malloc((size_t)count * sizeof(MPI_Aint)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for collective offset table length array") + + /* Fill arrays */ + node = H5SL_first(collective_write_list); + HDassert(node); + if(NULL == (item = (H5C_collective_write_t *)H5SL_item(node))) + HGOTO_ERROR(H5E_CACHE, H5E_NOTFOUND, FAIL, "can't retrieve skip list item") + + length_array[0] = (int)item->length; + base_buf = item->buf; + buf_array[0] = (MPI_Aint)0; + offset_array[0] = (MPI_Aint)item->offset; + + node = H5SL_next(node); + i = 1; + while(node) { + if(NULL == (item = (H5C_collective_write_t *)H5SL_item(node))) + HGOTO_ERROR(H5E_CACHE, H5E_NOTFOUND, FAIL, "can't retrieve skip list item") + + length_array[i] = (int)item->length; + buf_array[i] = (MPI_Aint)item->buf - (MPI_Aint)base_buf; + offset_array[i] = (MPI_Aint)item->offset; + node = H5SL_next(node); + i++; + } /* end while */ + + /* Create memory mpi type */ + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed(count, length_array, buf_array, MPI_BYTE, &btype))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) + btype_created = TRUE; + if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(&btype))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + + /* Create file mpi type */ + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed(count, length_array, offset_array, MPI_BYTE, &ftype))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) + ftype_created = TRUE; + if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(&ftype))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + + /* Pass buf type, file type to the file driver. */ + if(H5FD_mpi_setup_collective(dxpl_id, &btype, &ftype) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O properties") + + /* Write data */ + if(H5F_block_write(f, H5FD_MEM_DEFAULT, (haddr_t)0, (size_t)1, dxpl_id, base_buf) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to write entries collectively") + } /* end if */ + else { + MPI_Status mpi_stat; + MPI_File mpi_fh_p; + MPI_File mpi_fh; + + if(H5F_get_mpi_handle(f, (MPI_File **)&mpi_fh_p) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get mpi file handle") + mpi_fh = *(MPI_File*)mpi_fh_p; + + /* just to match up with the 1st MPI_File_set_view from H5FD_mpio_write() */ + if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(mpi_fh, (MPI_Offset)0, MPI_BYTE, + MPI_BYTE, "native", MPI_INFO_NULL))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) + + /* just to match up with MPI_File_write_at_all from H5FD_mpio_write() */ + HDmemset(&mpi_stat, 0, sizeof(MPI_Status)); + if(MPI_SUCCESS != (mpi_code = MPI_File_write_at_all(mpi_fh, (MPI_Offset)0, NULL, 0, + MPI_BYTE, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code) + + /* just to match up with the 2nd MPI_File_set_view (reset) in H5FD_mpio_write() */ + if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(mpi_fh, (MPI_Offset)0, MPI_BYTE, + MPI_BYTE, "native", MPI_INFO_NULL))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) + } /* end else */ + +done: + /* Free arrays */ + length_array = (int *)H5MM_xfree(length_array); + buf_array = (MPI_Aint *)H5MM_xfree(buf_array); + offset_array = (MPI_Aint *)H5MM_xfree(offset_array); + + /* Free MPI Types */ + if(btype_created && MPI_SUCCESS != (mpi_code = MPI_Type_free(&btype))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + if(ftype_created && MPI_SUCCESS != (mpi_code = MPI_Type_free(&ftype))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + + /* Reset dxpl */ + if(orig_xfer_mode != H5FD_MPIO_COLLECTIVE) { + HDassert(plist); + if(H5P_set(plist, H5D_XFER_IO_XFER_MODE_NAME, &orig_xfer_mode) < 0) + HDONE_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O property") + } /* end if */ + + FUNC_LEAVE_NOAPI(ret_value); +} /* end H5C_collective_write() */ + +herr_t +H5C_collective_write_free(void *_item, void H5_ATTR_UNUSED *key, void H5_ATTR_UNUSED *op_data) +{ + H5C_collective_write_t *item = (H5C_collective_write_t *)_item; + + FUNC_ENTER_NOAPI_NOINIT_NOERR + + HDassert(item); + + if(item->free_buf) + item->buf = H5MM_xfree(item->buf); + /*!FIXME change to use free list for items */ + H5MM_free(item); + + FUNC_LEAVE_NOAPI(SUCCEED) +} /* end H5C_collective_write_free() */ #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5Cpkg.h b/src/H5Cpkg.h index 9508b98..2f4d137 100644 --- a/src/H5Cpkg.h +++ b/src/H5Cpkg.h @@ -2542,7 +2542,16 @@ if ( ( (cache_ptr)->index_size != \ HDassert( ((entry_ptr)->ro_ref_count) == 0 ); \ HDassert( (entry_ptr)->size > 0 ); \ HDassert( new_size > 0 ); \ - \ + \ + if ( (entry_ptr)->coll_access ) { \ + \ + H5C__DLL_UPDATE_FOR_SIZE_CHANGE((cache_ptr)->coll_list_len, \ + (cache_ptr)->coll_list_size, \ + (entry_ptr)->size, \ + (new_size)); \ + \ + } \ + \ if ( (entry_ptr)->is_pinned ) { \ \ H5C__DLL_UPDATE_FOR_SIZE_CHANGE((cache_ptr)->pel_len, \ @@ -2892,6 +2901,247 @@ if ( ( (cache_ptr)->index_size != \ #endif /* H5C_MAINTAIN_CLEAN_AND_DIRTY_LRU_LISTS */ +#ifdef H5_HAVE_PARALLEL + +#if H5C_DO_SANITY_CHECKS + +#define H5C__COLL_DLL_PRE_REMOVE_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv) \ +if ( ( (hd_ptr) == NULL ) || \ + ( (tail_ptr) == NULL ) || \ + ( (entry_ptr) == NULL ) || \ + ( (len) <= 0 ) || \ + ( (Size) < (entry_ptr)->size ) || \ + ( ( (Size) == (entry_ptr)->size ) && ( ! ( (len) == 1 ) ) ) || \ + ( ( (entry_ptr)->coll_prev == NULL ) && ( (hd_ptr) != (entry_ptr) ) ) || \ + ( ( (entry_ptr)->coll_next == NULL ) && ( (tail_ptr) != (entry_ptr) ) ) || \ + ( ( (len) == 1 ) && \ + ( ! ( ( (hd_ptr) == (entry_ptr) ) && ( (tail_ptr) == (entry_ptr) ) && \ + ( (entry_ptr)->coll_next == NULL ) && \ + ( (entry_ptr)->coll_prev == NULL ) && \ + ( (Size) == (entry_ptr)->size ) \ + ) \ + ) \ + ) \ + ) { \ + HDassert(0 && "coll DLL pre remove SC failed"); \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "coll DLL pre remove SC failed") \ +} + +#define H5C__COLL_DLL_SC(head_ptr, tail_ptr, len, Size, fv) \ +if ( ( ( ( (head_ptr) == NULL ) || ( (tail_ptr) == NULL ) ) && \ + ( (head_ptr) != (tail_ptr) ) \ + ) || \ + ( (len) < 0 ) || \ + ( (Size) < 0 ) || \ + ( ( (len) == 1 ) && \ + ( ( (head_ptr) != (tail_ptr) ) || ( (Size) <= 0 ) || \ + ( (head_ptr) == NULL ) || ( (head_ptr)->size != (Size) ) \ + ) \ + ) || \ + ( ( (len) >= 1 ) && \ + ( ( (head_ptr) == NULL ) || ( (head_ptr)->coll_prev != NULL ) || \ + ( (tail_ptr) == NULL ) || ( (tail_ptr)->coll_next != NULL ) \ + ) \ + ) \ + ) { \ + HDassert(0 && "COLL DLL sanity check failed"); \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "COLL DLL sanity check failed") \ +} + +#define H5C__COLL_DLL_PRE_INSERT_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv) \ +if ( ( (entry_ptr) == NULL ) || \ + ( (entry_ptr)->coll_next != NULL ) || \ + ( (entry_ptr)->coll_prev != NULL ) || \ + ( ( ( (hd_ptr) == NULL ) || ( (tail_ptr) == NULL ) ) && \ + ( (hd_ptr) != (tail_ptr) ) \ + ) || \ + ( (len) < 0 ) || \ + ( ( (len) == 1 ) && \ + ( ( (hd_ptr) != (tail_ptr) ) || ( (Size) <= 0 ) || \ + ( (hd_ptr) == NULL ) || ( (hd_ptr)->size != (Size) ) \ + ) \ + ) || \ + ( ( (len) >= 1 ) && \ + ( ( (hd_ptr) == NULL ) || ( (hd_ptr)->coll_prev != NULL ) || \ + ( (tail_ptr) == NULL ) || ( (tail_ptr)->coll_next != NULL ) \ + ) \ + ) \ + ) { \ + HDassert(0 && "COLL DLL pre insert SC failed"); \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "COLL DLL pre insert SC failed") \ +} + +#else /* H5C_DO_SANITY_CHECKS */ + +#define H5C__COLL_DLL_PRE_REMOVE_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv) +#define H5C__COLL_DLL_SC(head_ptr, tail_ptr, len, Size, fv) +#define H5C__COLL_DLL_PRE_INSERT_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv) + +#endif /* H5C_DO_SANITY_CHECKS */ + + +#define H5C__COLL_DLL_APPEND(entry_ptr, head_ptr, tail_ptr, len, Size, fail_val) \ +{ \ + H5C__COLL_DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, \ + fail_val) \ + if ( (head_ptr) == NULL ) \ + { \ + (head_ptr) = (entry_ptr); \ + (tail_ptr) = (entry_ptr); \ + } \ + else \ + { \ + (tail_ptr)->coll_next = (entry_ptr); \ + (entry_ptr)->coll_prev = (tail_ptr); \ + (tail_ptr) = (entry_ptr); \ + } \ + (len)++; \ + (Size) += entry_ptr->size; \ +} /* H5C__COLL_DLL_APPEND() */ + +#define H5C__COLL_DLL_PREPEND(entry_ptr, head_ptr, tail_ptr, len, Size, fv) \ +{ \ + H5C__COLL_DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv)\ + if ( (head_ptr) == NULL ) \ + { \ + (head_ptr) = (entry_ptr); \ + (tail_ptr) = (entry_ptr); \ + } \ + else \ + { \ + (head_ptr)->coll_prev = (entry_ptr); \ + (entry_ptr)->coll_next = (head_ptr); \ + (head_ptr) = (entry_ptr); \ + } \ + (len)++; \ + (Size) += entry_ptr->size; \ +} /* H5C__COLL_DLL_PREPEND() */ + +#define H5C__COLL_DLL_REMOVE(entry_ptr, head_ptr, tail_ptr, len, Size, fv) \ +{ \ + H5C__COLL_DLL_PRE_REMOVE_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv)\ + { \ + if ( (head_ptr) == (entry_ptr) ) \ + { \ + (head_ptr) = (entry_ptr)->coll_next; \ + if ( (head_ptr) != NULL ) \ + (head_ptr)->coll_prev = NULL; \ + } \ + else \ + { \ + (entry_ptr)->coll_prev->coll_next = (entry_ptr)->coll_next; \ + } \ + if ( (tail_ptr) == (entry_ptr) ) \ + { \ + (tail_ptr) = (entry_ptr)->coll_prev; \ + if ( (tail_ptr) != NULL ) \ + (tail_ptr)->coll_next = NULL; \ + } \ + else \ + (entry_ptr)->coll_next->coll_prev = (entry_ptr)->coll_prev; \ + entry_ptr->coll_next = NULL; \ + entry_ptr->coll_prev = NULL; \ + (len)--; \ + (Size) -= entry_ptr->size; \ + } \ +} /* H5C__COLL_DLL_REMOVE() */ + + +/*------------------------------------------------------------------------- + * + * Macro: H5C__INSERT_IN_COLL_LIST + * + * Purpose: Insert entry into collective entries list + * + * Return: N/A + * + * Programmer: Mohamad Chaarawi + * + *------------------------------------------------------------------------- + */ + +#define H5C__INSERT_IN_COLL_LIST(cache_ptr, entry_ptr, fail_val) \ +{ \ + HDassert( (cache_ptr) ); \ + HDassert( (cache_ptr)->magic == H5C__H5C_T_MAGIC ); \ + HDassert( (entry_ptr) ); \ + \ + /* insert the entry at the head of the list. */ \ + \ + H5C__COLL_DLL_PREPEND((entry_ptr), (cache_ptr)->coll_head_ptr, \ + (cache_ptr)->coll_tail_ptr, \ + (cache_ptr)->coll_list_len, \ + (cache_ptr)->coll_list_size, \ + (fail_val)) \ + \ +} /* H5C__INSERT_IN_COLL_LIST */ + + +/*------------------------------------------------------------------------- + * + * Macro: H5C__REMOVE_FROM_COLL_LIST + * + * Purpose: Remove entry from collective entries list + * + * Return: N/A + * + * Programmer: Mohamad Chaarawi + * + *------------------------------------------------------------------------- + */ + +#define H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, fail_val) \ +{ \ + HDassert( (cache_ptr) ); \ + HDassert( (cache_ptr)->magic == H5C__H5C_T_MAGIC ); \ + HDassert( (entry_ptr) ); \ + \ + /* remove the entry from the list. */ \ + \ + H5C__COLL_DLL_REMOVE((entry_ptr), (cache_ptr)->coll_head_ptr, \ + (cache_ptr)->coll_tail_ptr, \ + (cache_ptr)->coll_list_len, \ + (cache_ptr)->coll_list_size, \ + (fail_val)) \ + \ +} /* H5C__REMOVE_FROM_COLL_LIST */ + + +/*------------------------------------------------------------------------- + * + * Macro: H5C__MOVE_TO_TOP_IN_COLL_LIST + * + * Purpose: Update entry position in collective entries list + * + * Return: N/A + * + * Programmer: Mohamad Chaarawi + * + *------------------------------------------------------------------------- + */ + +#define H5C__MOVE_TO_TOP_IN_COLL_LIST(cache_ptr, entry_ptr, fail_val) \ +{ \ + HDassert( (cache_ptr) ); \ + HDassert( (cache_ptr)->magic == H5C__H5C_T_MAGIC ); \ + HDassert( (entry_ptr) ); \ + \ + /* Remove entry and insert at the head of the list. */ \ + H5C__COLL_DLL_REMOVE((entry_ptr), (cache_ptr)->coll_head_ptr, \ + (cache_ptr)->coll_tail_ptr, \ + (cache_ptr)->coll_list_len, \ + (cache_ptr)->coll_list_size, \ + (fail_val)) \ + \ + H5C__COLL_DLL_PREPEND((entry_ptr), (cache_ptr)->coll_head_ptr, \ + (cache_ptr)->coll_tail_ptr, \ + (cache_ptr)->coll_list_len, \ + (cache_ptr)->coll_list_size, \ + (fail_val)) \ + \ +} /* H5C__MOVE_TO_TOP_IN_COLL_LIST */ +#endif /* H5_HAVE_PARALLEL */ + /****************************/ /* Package Private Typedefs */ @@ -3885,6 +4135,13 @@ struct H5C_t { H5C_cache_entry_t * dLRU_head_ptr; H5C_cache_entry_t * dLRU_tail_ptr; +#ifdef H5_HAVE_PARALLEL + int32_t coll_list_len; + size_t coll_list_size; + H5C_cache_entry_t * coll_head_ptr; + H5C_cache_entry_t * coll_tail_ptr; +#endif /* H5_HAVE_PARALLEL */ + /* Fields for automatic cache size adjustment */ hbool_t size_increase_possible; hbool_t flash_size_increase_possible; @@ -3988,6 +4245,15 @@ struct H5C_t { #endif /* NDEBUG */ }; +#ifdef H5_HAVE_PARALLEL +typedef struct H5C_collective_write_t { + size_t length; + hbool_t free_buf; + void *buf; + haddr_t offset; +} H5C_collective_write_t; +#endif /* H5_HAVE_PARALLEL */ + /*****************************/ /* Package Private Variables */ /*****************************/ @@ -3997,7 +4263,11 @@ struct H5C_t { /* Package Private Prototypes */ /******************************/ H5_DLL herr_t H5C__flush_single_entry(const H5F_t *f, hid_t dxpl_id, - H5C_cache_entry_t *entry_ptr, unsigned flags, int64_t *entry_size_change_ptr); + H5C_cache_entry_t *entry_ptr, unsigned flags, int64_t *entry_size_change_ptr, H5SL_t *collective_write_list); +#ifdef H5_HAVE_PARALLEL +H5_DLL herr_t H5C_collective_write(H5F_t *f, hid_t dxpl_id, H5SL_t *collective_write_list); +H5_DLL herr_t H5C_collective_write_free(void *_item, void *key, void *op_data); +#endif /* H5_HAVE_PARALLEL */ #endif /* _H5Cpkg_H */ diff --git a/src/H5Cprivate.h b/src/H5Cprivate.h index da9e6f5..5ad026b 100644 --- a/src/H5Cprivate.h +++ b/src/H5Cprivate.h @@ -1608,6 +1608,8 @@ typedef struct H5C_cache_entry_t { #ifdef H5_HAVE_PARALLEL hbool_t clear_on_unprotect; hbool_t flush_immediately; + hbool_t coll_access; + hbool_t ind_access_while_coll; #endif /* H5_HAVE_PARALLEL */ hbool_t flush_in_progress; hbool_t destroy_in_progress; @@ -1631,6 +1633,8 @@ typedef struct H5C_cache_entry_t { struct H5C_cache_entry_t * prev; struct H5C_cache_entry_t * aux_next; struct H5C_cache_entry_t * aux_prev; + struct H5C_cache_entry_t * coll_next; + struct H5C_cache_entry_t * coll_prev; #if H5C_COLLECT_CACHE_ENTRY_STATS /* cache entry stats fields */ @@ -1996,6 +2000,7 @@ H5_DLL herr_t H5C_apply_candidate_list(H5F_t *f, hid_t dxpl_id, int mpi_rank, int mpi_size); H5_DLL herr_t H5C_construct_candidate_list__clean_cache(H5C_t *cache_ptr); H5_DLL herr_t H5C_construct_candidate_list__min_clean(H5C_t *cache_ptr); +H5_DLL herr_t H5C_clear_coll_entries(H5C_t * cache_ptr, hbool_t partial); H5_DLL herr_t H5C_mark_entries_as_clean(H5F_t *f, hid_t dxpl_id, int32_t ce_array_len, haddr_t *ce_array_ptr); #endif /* H5_HAVE_PARALLEL */ @@ -910,7 +910,7 @@ H5Dset_extent(hid_t dset_id, const hsize_t size[]) HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "no size specified") /* Private function */ - if(H5D__set_extent(dset, size, H5AC_dxpl_id) < 0) + if(H5D__set_extent(dset, size, H5AC_coll_read_dxpl_id) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to set extend dataset") done: diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c index 5657e6e..37e48ba 100644 --- a/src/H5Dchunk.c +++ b/src/H5Dchunk.c @@ -45,7 +45,7 @@ /****************/ #include "H5Dmodule.h" /* This source code file is part of the H5D module */ - +#define H5F_FRIEND /*suppress error about including H5Fpkg */ /***********/ /* Headers */ @@ -56,6 +56,7 @@ #endif /* H5_HAVE_PARALLEL */ #include "H5Dpkg.h" /* Dataset functions */ #include "H5Eprivate.h" /* Error handling */ +#include "H5Fpkg.h" /* File functions */ #include "H5FLprivate.h" /* Free Lists */ #include "H5Iprivate.h" /* IDs */ #include "H5MMprivate.h" /* Memory management */ @@ -2650,6 +2651,9 @@ H5D__chunk_lookup(const H5D_t *dset, hid_t dxpl_id, const hsize_t *scaled, /* Check for cached information */ if(!H5D__chunk_cinfo_cache_found(&dset->shared->cache.chunk.last, udata)) { H5D_chk_idx_info_t idx_info; /* Chunked index info */ +#ifdef H5_HAVE_PARALLEL + H5P_coll_md_read_flag_t temp_flag; /* temp flag to hold the coll metadata read setting */ +#endif /* H5_HAVE_PARALLEL */ /* Compose chunked index info struct */ idx_info.f = dset->oloc.file; @@ -2658,10 +2662,27 @@ H5D__chunk_lookup(const H5D_t *dset, hid_t dxpl_id, const hsize_t *scaled, idx_info.layout = &dset->shared->layout.u.chunk; idx_info.storage = &dset->shared->layout.storage.u.chunk; +#ifdef H5_HAVE_PARALLEL + if(H5F_HAS_FEATURE(idx_info.f, H5FD_FEAT_HAS_MPI)) { + /* disable collective metadata read for chunk indexes + as it is highly unlikely that users would read the + same chunks from all processes. MSC - might turn on + for root node? */ + temp_flag = idx_info.f->coll_md_read; + idx_info.f->coll_md_read = H5P_FORCE_FALSE; + } +#endif /* H5_HAVE_PARALLEL */ + /* Go get the chunk information */ if((dset->shared->layout.storage.u.chunk.ops->get_addr)(&idx_info, udata) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't query chunk address") +#ifdef H5_HAVE_PARALLEL + if(H5F_HAS_FEATURE(idx_info.f, H5FD_FEAT_HAS_MPI)) { + idx_info.f->coll_md_read = temp_flag; + } +#endif /* H5_HAVE_PARALLEL */ + /* Cache the information retrieved */ H5D__chunk_cinfo_cache_update(&dset->shared->cache.chunk.last, udata); } /* end if */ diff --git a/src/H5Ddeprec.c b/src/H5Ddeprec.c index c5d6929..f0f4472 100644 --- a/src/H5Ddeprec.c +++ b/src/H5Ddeprec.c @@ -141,7 +141,8 @@ H5Dcreate1(hid_t loc_id, const char *name, hid_t type_id, hid_t space_id, HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not dataset create property list ID") /* Build and open the new dataset */ - if(NULL == (dset = H5D__create_named(&loc, name, type_id, space, H5P_LINK_CREATE_DEFAULT, dcpl_id, H5P_DATASET_ACCESS_DEFAULT, H5AC_dxpl_id))) + if(NULL == (dset = H5D__create_named(&loc, name, type_id, space, H5P_LINK_CREATE_DEFAULT, + dcpl_id, H5P_DATASET_ACCESS_DEFAULT, H5AC_dxpl_id))) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to create dataset") /* Register the new dataset to get an ID for it */ @@ -472,6 +472,7 @@ H5Fcreate(const char *filename, unsigned flags, hid_t fcpl_id, hid_t fapl_id) if(TRUE != H5P_isa_class(fcpl_id, H5P_FILE_CREATE)) HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not file create property list") + /* Verify access property list and get correct dxpl */ if(H5P_verify_apl_and_dxpl(&fapl_id, H5P_CLS_FACC, &dxpl_id, H5I_INVALID_HID, TRUE) < 0) HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, "can't set access and transfer property lists") diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index bbd3eca..ed70e20 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -1696,6 +1696,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, int size_i; hbool_t use_view_this_time = FALSE; H5P_genplist_t *plist = NULL; /* Property list pointer */ + H5FD_mpio_xfer_t xfer_mode; /* I/O tranfer mode */ herr_t ret_value = SUCCEED; FUNC_ENTER_NOAPI_NOINIT @@ -1726,57 +1727,42 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, fprintf(stdout, "in H5FD_mpio_write mpi_off=%ld size_i=%d\n", (long)mpi_off, size_i); #endif - if(type == H5FD_MEM_DRAW) { - H5FD_mpio_xfer_t xfer_mode; /* I/O tranfer mode */ + /* Obtain the data transfer properties */ + if(NULL == (plist = (H5P_genplist_t *)H5I_object(dxpl_id))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list") - /* Obtain the data transfer properties */ - if(NULL == (plist = (H5P_genplist_t *)H5I_object(dxpl_id))) - HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list") + /* get the transfer mode from the dxpl */ + if(H5P_get(plist, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode)<0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode") - /* get the transfer mode from the dxpl */ - if(H5P_get(plist, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode)<0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode") + /* + * Set up for a fancy xfer using complex types, or single byte block. We + * wouldn't need to rely on the use_view field if MPI semantics allowed + * us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which + * could mean "use MPI_BYTE" by convention). + */ + if(xfer_mode == H5FD_MPIO_COLLECTIVE) { + MPI_Datatype file_type; - /* - * Set up for a fancy xfer using complex types, or single byte block. We - * wouldn't need to rely on the use_view field if MPI semantics allowed - * us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which - * could mean "use MPI_BYTE" by convention). - */ - if(xfer_mode == H5FD_MPIO_COLLECTIVE) { - MPI_Datatype file_type; + /* Remember that views are used */ + use_view_this_time = TRUE; - /* Remember that views are used */ - use_view_this_time = TRUE; + /* prepare for a full-blown xfer using btype, ftype, and disp */ + if(H5P_get(plist, H5FD_MPI_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") + if(H5P_get(plist, H5FD_MPI_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") - /* prepare for a full-blown xfer using btype, ftype, and disp */ - if(H5P_get(plist, H5FD_MPI_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") - if(H5P_get(plist, H5FD_MPI_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") - - /* - * Set the file view when we are using MPI derived types - */ - if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, H5FD_mpi_native_g, file->info))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) + /* + * Set the file view when we are using MPI derived types + */ + if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, H5FD_mpi_native_g, file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) - /* When using types, use the address as the displacement for - * MPI_File_set_view and reset the address for the read to zero - */ - mpi_off = 0; - } /* end if */ - } /* end if */ - else { -#if 0 /* JRM -- 3/23/10 */ /* this is no longer always the case */ - /* Only one process can do the actual metadata write */ - if(file->mpi_rank != H5_PAR_META_WRITE) -#ifdef LATER - HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "can't write metadata from non-zero rank") -#else /* LATER */ - HGOTO_DONE(SUCCEED) /* skip the actual write */ -#endif /* LATER */ -#endif /* JRM */ + /* When using types, use the address as the displacement for + * MPI_File_set_view and reset the address for the read to zero + */ + mpi_off = 0; } /* end if */ /* Write the data. */ @@ -1803,6 +1789,8 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code) } /* end if */ else { + if(type != H5FD_MEM_DRAW) + HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "Metadata Coll opt property should be collective at this point") #ifdef H5FDmpio_DEBUG if(H5FD_mpio_Debug[(int)'t']) fprintf(stdout, "H5FD_mpio_write: doing MPI independent IO\n"); diff --git a/src/H5Fint.c b/src/H5Fint.c index 14f5456..ea6c9c1 100644 --- a/src/H5Fint.c +++ b/src/H5Fint.c @@ -172,6 +172,10 @@ H5F_get_access_plist(H5F_t *f, hbool_t app_ref) efc_size = H5F_efc_max_nfiles(f->shared->efc); if(H5P_set(new_plist, H5F_ACS_EFC_SIZE_NAME, &efc_size) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't set elink file cache size") +#ifdef H5_HAVE_PARALLEL + if(H5P_set(new_plist, H5_COLL_MD_READ_FLAG_NAME, &(f->coll_md_read)) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't set collective metadata read flag") +#endif /* H5_HAVE_PARALLEL */ /* Prepare the driver property */ driver_prop.driver_id = f->shared->lf->driver_id; @@ -637,6 +641,12 @@ H5F_new(H5F_file_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5FD_t if(efc_size > 0) if(NULL == (f->shared->efc = H5F_efc_create(efc_size))) HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, NULL, "can't create external file cache") +#ifdef H5_HAVE_PARALLEL + if(H5P_get(plist, H5_COLL_MD_READ_FLAG_NAME, &(f->coll_md_read)) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata read flag") + if(H5P_get(plist, H5F_ACS_COLL_MD_WRITE_FLAG_NAME, &(f->coll_md_write)) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata write flag") +#endif /* H5_HAVE_PARALLEL */ /* Get the VFD values to cache */ f->shared->maxaddr = H5FD_get_maxaddr(lf); diff --git a/src/H5Fmpi.c b/src/H5Fmpi.c index 9783947..5434aa5 100644 --- a/src/H5Fmpi.c +++ b/src/H5Fmpi.c @@ -77,6 +77,35 @@ #ifdef H5_HAVE_PARALLEL + +/*------------------------------------------------------------------------- + * Function: H5F_get_mpi_handle + * + * Purpose: Retrieves MPI File handle. + * + * Return: Success: The size (positive) + * Failure: Negative + * + *------------------------------------------------------------------------- + */ +herr_t +H5F_get_mpi_handle(const H5F_t *f, MPI_File **f_handle) +{ + herr_t ret_value = SUCCEED; + hid_t fapl = -1; + + FUNC_ENTER_NOAPI(FAIL) + + assert(f && f->shared); + + /* Dispatch to driver */ + if ((ret_value = H5FD_get_vfd_handle(f->shared->lf, fapl, (void **)f_handle)) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get mpi file handle") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5F_get_mpi_handle() */ + /*------------------------------------------------------------------------- * Function: H5F_mpi_get_rank diff --git a/src/H5Fpkg.h b/src/H5Fpkg.h index c62858c..be324d0 100644 --- a/src/H5Fpkg.h +++ b/src/H5Fpkg.h @@ -311,6 +311,10 @@ struct H5F_t { hbool_t closing; /* File is in the process of being closed */ struct H5F_t *parent; /* Parent file that this file is mounted to */ unsigned nmounts; /* Number of children mounted to this file */ +#ifdef H5_HAVE_PARALLEL + H5P_coll_md_read_flag_t coll_md_read; /* Do all metadata reads collectively */ + hbool_t coll_md_write; /* Do all metadata writes collectively */ +#endif /* H5_HAVE_PARALLEL */ }; diff --git a/src/H5Fprivate.h b/src/H5Fprivate.h index dd2877b..7decaed 100644 --- a/src/H5Fprivate.h +++ b/src/H5Fprivate.h @@ -454,6 +454,7 @@ #define H5F_ACS_FILE_IMAGE_INFO_NAME "file_image_info" /* struct containing initial file image and callback info */ #define H5F_ACS_CORE_WRITE_TRACKING_FLAG_NAME "core_write_tracking_flag" /* Whether or not core VFD backing store write tracking is enabled */ #define H5F_ACS_CORE_WRITE_TRACKING_PAGE_SIZE_NAME "core_write_tracking_page_size" /* The page size in kiB when core VFD write tracking is enabled */ +#define H5F_ACS_COLL_MD_WRITE_FLAG_NAME "collective_metadata_write" /* property indicating whether metadata writes are done collectively or not */ /* ======================== File Mount properties ====================*/ #define H5F_MNT_SYM_LOCAL_NAME "local" /* Whether absolute symlinks local to file. */ @@ -676,6 +677,7 @@ H5_DLL herr_t H5F_super_dirty(H5F_t *f); /* Parallel I/O (i.e. MPI) related routines */ #ifdef H5_HAVE_PARALLEL +H5_DLL herr_t H5F_get_mpi_handle(const H5F_t *f, MPI_File **f_handle); H5_DLL int H5F_mpi_get_rank(const H5F_t *f); H5_DLL MPI_Comm H5F_mpi_get_comm(const H5F_t *f); H5_DLL int H5F_mpi_get_size(const H5F_t *f); diff --git a/src/H5Pdxpl.c b/src/H5Pdxpl.c index 89dcd16..535e34b 100644 --- a/src/H5Pdxpl.c +++ b/src/H5Pdxpl.c @@ -181,7 +181,13 @@ #define H5FD_DXPL_TYPE_SIZE sizeof(H5FD_dxpl_type_t) #define H5FD_DXPL_TYPE_DEF H5FD_NOIO_DXPL #endif /* H5_DEBUG_BUILD */ - +#ifdef H5_HAVE_PARALLEL +/* Definition for reading metadata collectively */ +#define H5D_XFER_COLL_MD_READ_SIZE sizeof(H5P_coll_md_read_flag_t) +#define H5D_XFER_COLL_MD_READ_DEF H5P_USER_FALSE +#define H5D_XFER_COLL_MD_READ_ENC H5P__encode_coll_md_read_flag_t +#define H5D_XFER_COLL_MD_READ_DEC H5P__decode_coll_md_read_flag_t +#endif H5_HAVE_PARALLEL /******************/ /* Local Typedefs */ /******************/ @@ -278,6 +284,7 @@ static const H5D_mpio_no_collective_cause_t H5D_def_mpio_no_collective_cause_g = #ifdef H5_HAVE_PARALLEL static const MPI_Datatype H5D_def_btype_g = H5FD_MPI_XFER_MEM_MPI_TYPE_DEF; /* Default value for MPI buffer type */ static const MPI_Datatype H5D_def_ftype_g = H5FD_MPI_XFER_FILE_MPI_TYPE_DEF; /* Default value for MPI file type */ +static const H5P_coll_md_read_flag_t H5D_def_coll_md_read_g = H5D_XFER_COLL_MD_READ_DEF; /* Default setting for the collective metedata read flag */ #endif /* H5_HAVE_PARALLEL */ static const H5Z_EDC_t H5D_def_enable_edc_g = H5D_XFER_EDC_DEF; /* Default value for EDC property */ static const H5Z_cb_t H5D_def_filter_cb_g = H5D_XFER_FILTER_CB_DEF; /* Default value for filter callback */ @@ -435,6 +442,13 @@ H5P__dxfr_reg_prop(H5P_genclass_t *pclass) if(H5P_register_real(pclass, H5FD_MPI_XFER_FILE_MPI_TYPE_NAME, H5FD_MPI_XFER_FILE_MPI_TYPE_SIZE, &H5D_def_ftype_g, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class") + + /* Register the metadata collective read flag */ + if(H5P_register_real(pclass, H5_COLL_MD_READ_FLAG_NAME, H5D_XFER_COLL_MD_READ_SIZE, + &H5D_def_coll_md_read_g, + NULL, NULL, NULL, H5D_XFER_COLL_MD_READ_ENC, H5D_XFER_COLL_MD_READ_DEC, + NULL, NULL, NULL, NULL) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class") #endif /* H5_HAVE_PARALLEL */ /* Register the EDC property */ @@ -485,11 +499,12 @@ H5P__dxfr_reg_prop(H5P_genclass_t *pclass) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class") - /* Register the ring property */ + /* Register the ring property (private) */ if(H5P_register_real(pclass, H5AC_RING_NAME, H5AC_XFER_RING_SIZE, &H5D_ring_g, NULL, NULL, NULL, H5AC_XFER_RING_ENC, H5AC_XFER_RING_DEC, NULL, NULL, NULL, NULL) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class") + #ifdef H5_DEBUG_BUILD /* Register the dxpl IO type property */ if(H5P_register_real(pclass, H5FD_DXPL_TYPE_NAME, H5FD_DXPL_TYPE_SIZE, &H5D_dxpl_type_g, diff --git a/src/H5Pfapl.c b/src/H5Pfapl.c index b92d3e9..75f9e7c 100644 --- a/src/H5Pfapl.c +++ b/src/H5Pfapl.c @@ -178,6 +178,17 @@ #define H5F_ACS_CORE_WRITE_TRACKING_PAGE_SIZE_DEF 524288 #define H5F_ACS_CORE_WRITE_TRACKING_PAGE_SIZE_ENC H5P__encode_size_t #define H5F_ACS_CORE_WRITE_TRACKING_PAGE_SIZE_DEC H5P__decode_size_t +/* Definition of collective metadata read mode flag */ +#define H5F_ACS_COLL_MD_READ_FLAG_SIZE sizeof(H5P_coll_md_read_flag_t) +#define H5F_ACS_COLL_MD_READ_FLAG_DEF H5P_USER_FALSE +#define H5F_ACS_COLL_MD_READ_FLAG_ENC H5P__encode_coll_md_read_flag_t +#define H5F_ACS_COLL_MD_READ_FLAG_DEC H5P__decode_coll_md_read_flag_t +/* Definition of collective metadata write mode flag */ +#define H5F_ACS_COLL_MD_WRITE_FLAG_SIZE sizeof(hbool_t) +#define H5F_ACS_COLL_MD_WRITE_FLAG_DEF FALSE +#define H5F_ACS_COLL_MD_WRITE_FLAG_ENC H5P__encode_hbool_t +#define H5F_ACS_COLL_MD_WRITE_FLAG_DEC H5P__decode_hbool_t + /******************/ /* Local Typedefs */ @@ -280,6 +291,8 @@ static const unsigned H5F_def_efc_size_g = H5F_ACS_EFC_SIZE_DEF; static const H5FD_file_image_info_t H5F_def_file_image_info_g = H5F_ACS_FILE_IMAGE_INFO_DEF; /* Default file image info and callbacks */ static const hbool_t H5F_def_core_write_tracking_flag_g = H5F_ACS_CORE_WRITE_TRACKING_FLAG_DEF; /* Default setting for core VFD write tracking */ static const size_t H5F_def_core_write_tracking_page_size_g = H5F_ACS_CORE_WRITE_TRACKING_PAGE_SIZE_DEF; /* Default core VFD write tracking page size */ +static const H5P_coll_md_read_flag_t H5F_def_coll_md_read_flag_g = H5F_ACS_COLL_MD_READ_FLAG_DEF; /* Default setting for the collective metedata read flag */ +static const hbool_t H5F_def_coll_md_write_flag_g = H5F_ACS_COLL_MD_WRITE_FLAG_DEF; /* Default setting for the collective metedata write flag */ @@ -437,6 +450,18 @@ H5P__facc_reg_prop(H5P_genclass_t *pclass) NULL, NULL, NULL, NULL) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class") + /* Register the metadata collective read flag */ + if(H5P_register_real(pclass, H5_COLL_MD_READ_FLAG_NAME, H5F_ACS_COLL_MD_READ_FLAG_SIZE, &H5F_def_coll_md_read_flag_g, + NULL, NULL, NULL, H5F_ACS_COLL_MD_READ_FLAG_ENC, H5F_ACS_COLL_MD_READ_FLAG_DEC, + NULL, NULL, NULL, NULL) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class") + + /* Register the metadata collective write flag */ + if(H5P_register_real(pclass, H5F_ACS_COLL_MD_WRITE_FLAG_NAME, H5F_ACS_COLL_MD_WRITE_FLAG_SIZE, &H5F_def_coll_md_write_flag_g, + NULL, NULL, NULL, H5F_ACS_COLL_MD_WRITE_FLAG_ENC, H5F_ACS_COLL_MD_WRITE_FLAG_DEC, + NULL, NULL, NULL, NULL) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class") + done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5P__facc_reg_prop() */ @@ -3481,3 +3506,270 @@ done: FUNC_LEAVE_API(ret_value) } /* end H5Pget_core_write_tracking() */ + +/*------------------------------------------------------------------------- + * Function: H5P__encode_coll_md_read_flag_t + * + * Purpose: Generic encoding callback routine for 'coll_md_read_flag' properties. + * + * Return: Success: Non-negative + * Failure: Negative + * + * Programmer: Mohamad Chaarawi + * Sunday, June 21, 2015 + * + *------------------------------------------------------------------------- + */ +herr_t +H5P__encode_coll_md_read_flag_t(const void *value, void **_pp, size_t *size) +{ + const H5P_coll_md_read_flag_t *coll_md_read_flag = (const H5P_coll_md_read_flag_t *)value; + uint8_t **pp = (uint8_t **)_pp; + + FUNC_ENTER_PACKAGE_NOERR + + /* Sanity checks */ + HDassert(coll_md_read_flag); + HDassert(size); + + if(NULL != *pp) { + /* Encode the value */ + HDmemcpy(*pp, coll_md_read_flag, sizeof(H5P_coll_md_read_flag_t)); + *pp += sizeof(H5P_coll_md_read_flag_t); + } /* end if */ + + /* Set size needed for encoding */ + *size += sizeof(H5P_coll_md_read_flag_t); + + FUNC_LEAVE_NOAPI(SUCCEED) +} /* end H5P__encode_coll_md_read_flag_t() */ + + +/*------------------------------------------------------------------------- + * Function: H5P__decode_coll_md_read_flag_t + * + * Purpose: Generic decoding callback routine for 'coll_md_read_flag' properties. + * + * Return: Success: Non-negative + * Failure: Negative + * + * Programmer: Mohamad Chaarawi + * Sunday, June 21, 2015 + * + *------------------------------------------------------------------------- + */ +herr_t +H5P__decode_coll_md_read_flag_t(const void **_pp, void *_value) +{ + H5P_coll_md_read_flag_t *coll_md_read_flag = (H5P_coll_md_read_flag_t *)_value; /* File close degree */ + const uint8_t **pp = (const uint8_t **)_pp; + + FUNC_ENTER_STATIC_NOERR + + /* Sanity checks */ + HDassert(pp); + HDassert(*pp); + HDassert(coll_md_read_flag); + + /* Decode file close degree */ + *coll_md_read_flag = (H5P_coll_md_read_flag_t)*(*pp); + *pp += sizeof(H5P_coll_md_read_flag_t); + + FUNC_LEAVE_NOAPI(SUCCEED) +} /* end H5P__decode_coll_md_read_flag_t() */ + +#ifdef H5_HAVE_PARALLEL + +/*------------------------------------------------------------------------- + * Function: H5Pset_coll_metadata_read + * + * Purpose: Tell the library whether the metadata read operations will + * be done collectively (1) or not (0). Default is independent. + * With collective mode, the library will optimize access to + * metadata operations on the file. + * + * Note: This routine accepts file access property lists, link + * access property lists, attribute access property lists, + * dataset access property lists, group access property lists, + * named datatype access property lists, + * and dataset transfer property lists. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Mohamad Chaarawi + * Sunday, June 21, 2015 + * + *------------------------------------------------------------------------- + */ +herr_t +H5Pset_coll_metadata_read(hid_t plist_id, hbool_t is_collective) +{ + H5P_genplist_t *plist; /* Property list pointer */ + H5P_coll_md_read_flag_t coll_meta_read; /* Property value */ + herr_t ret_value = SUCCEED; /* return value */ + + FUNC_ENTER_API(FAIL) + H5TRACE2("e", "ib", plist_id, is_collective); + + /* Compare the property list's class against the other class */ + /* (Dataset, group, attribute, and named datype access property lists + * are sub-classes of link access property lists -QAK) + */ + if(TRUE != H5P_isa_class(plist_id, H5P_LINK_ACCESS) && + TRUE != H5P_isa_class(plist_id, H5P_FILE_ACCESS) && + TRUE != H5P_isa_class(plist_id, H5P_DATASET_XFER)) + HGOTO_ERROR(H5E_PLIST, H5E_CANTREGISTER, FAIL, "property list is not an access plist") + + /* set property to either TRUE if > 0, or FALSE otherwise */ + if(is_collective) + coll_meta_read = H5P_USER_TRUE; + else + coll_meta_read = H5P_USER_FALSE; + + /* Get the plist structure */ + if(NULL == (plist = (H5P_genplist_t *)H5I_object(plist_id))) + HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID") + + /* Set values */ + if(H5P_set(plist, H5_COLL_MD_READ_FLAG_NAME, &coll_meta_read) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set collective metadata read flag") + +done: + FUNC_LEAVE_API(ret_value) +} /* end H5Pset_coll_metadata_read() */ + + +/*------------------------------------------------------------------------- + * Function: H5Pget_coll_metadata_read + * + * Purpose: Gets information about collective metadata read mode. + * + * Note: This routine accepts file access property lists, link + * access property lists, attribute access property lists, + * dataset access property lists, group access property lists, + * named datatype access property lists, + * and dataset transfer property lists. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Mohamad Chaarawi + * Sunday, June 21, 2015 + * + *------------------------------------------------------------------------- + */ +herr_t +H5Pget_coll_metadata_read(hid_t plist_id, hbool_t *is_collective) +{ + herr_t ret_value = SUCCEED; /* return value */ + + FUNC_ENTER_API(FAIL) + H5TRACE2("e", "i*b", plist_id, is_collective); + + /* Compare the property list's class against the other class */ + /* (Dataset, group, attribute, and named datype access property lists + * are sub-classes of link access property lists -QAK) + */ + if(TRUE != H5P_isa_class(plist_id, H5P_LINK_ACCESS) && + TRUE != H5P_isa_class(plist_id, H5P_FILE_ACCESS) && + TRUE != H5P_isa_class(plist_id, H5P_DATASET_XFER)) + HGOTO_ERROR(H5E_PLIST, H5E_CANTREGISTER, FAIL, "property list is not an access plist") + + /* Get value */ + if(is_collective) { + H5P_coll_md_read_flag_t internal_flag; /* property setting. we need to convert to either TRUE or FALSE */ + H5P_genplist_t *plist; /* Property list pointer */ + + /* Get the plist structure */ + if(NULL == (plist = (H5P_genplist_t *)H5I_object(plist_id))) + HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID") + + if(H5P_get(plist, H5_COLL_MD_READ_FLAG_NAME, &internal_flag) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get core collective metadata read flag") + + if(internal_flag < 0) + *is_collective = FALSE; + else + *is_collective = (hbool_t)internal_flag; + } /* end if */ + +done: + FUNC_LEAVE_API(ret_value) +} /* H5Pget_coll_metadata_read */ + + +/*------------------------------------------------------------------------- + * Function: H5Pset_coll_metadata_write + * + * Purpose: Tell the library whether the metadata write operations will + * be done collectively (1) or not (0). Default is collective. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Mohamad Chaarawi + * Sunday, June 21, 2015 + * + *------------------------------------------------------------------------- + */ +herr_t +H5Pset_coll_metadata_write(hid_t plist_id, hbool_t is_collective) +{ + H5P_genplist_t *plist; /* Property list pointer */ + herr_t ret_value = SUCCEED; /* return value */ + + FUNC_ENTER_API(FAIL) + H5TRACE2("e", "ib", plist_id, is_collective); + + /* Compare the property list's class against the other class */ + if(TRUE != H5P_isa_class(plist_id, H5P_FILE_ACCESS)) + HGOTO_ERROR(H5E_PLIST, H5E_CANTREGISTER, FAIL, "property list is not a file access plist") + + /* Get the plist structure */ + if(NULL == (plist = (H5P_genplist_t *)H5I_object(plist_id))) + HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID") + + /* Set values */ + if(H5P_set(plist, H5F_ACS_COLL_MD_WRITE_FLAG_NAME, &is_collective) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set collective metadata write flag") + +done: + FUNC_LEAVE_API(ret_value) +} /* end H5Pset_coll_metadata_write() */ + + +/*------------------------------------------------------------------------- + * Function: H5Pget_coll_metadata_write + * + * Purpose: Gets information about collective metadata write mode. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Mohamad Chaarawi + * Sunday, June 21, 2015 + * + *------------------------------------------------------------------------- + */ +herr_t +H5Pget_coll_metadata_write(hid_t plist_id, hbool_t *is_collective) +{ + H5P_genplist_t *plist; /* Property list pointer */ + herr_t ret_value = SUCCEED; /* return value */ + + FUNC_ENTER_API(FAIL) + H5TRACE2("e", "i*b", plist_id, is_collective); + + /* Compare the property list's class against the other class */ + if(TRUE != H5P_isa_class(plist_id, H5P_FILE_ACCESS)) + HGOTO_ERROR(H5E_PLIST, H5E_CANTREGISTER, FAIL, "property list is not an access plist") + + /* Get the plist structure */ + if(NULL == (plist = (H5P_genplist_t *)H5I_object(plist_id))) + HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID") + + if(H5P_get(plist, H5F_ACS_COLL_MD_WRITE_FLAG_NAME, is_collective) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get collective metadata write flag") + +done: + FUNC_LEAVE_API(ret_value) +} /* end H5Pget_coll_metadata_write() */ +#endif /* H5_HAVE_PARALLEL */ + diff --git a/src/H5Pint.c b/src/H5Pint.c index b3d1976..12582bb 100644 --- a/src/H5Pint.c +++ b/src/H5Pint.c @@ -29,6 +29,7 @@ /* Headers */ /***********/ #include "H5private.h" /* Generic Functions */ +#include "H5ACprivate.h" /* Metadata cache */ #include "H5Eprivate.h" /* Error handling */ #include "H5Fprivate.h" /* File access */ #include "H5FLprivate.h" /* Free lists */ @@ -5450,8 +5451,8 @@ H5P_get_class(const H5P_genplist_t *plist) *------------------------------------------------------------------------- */ herr_t -H5P_verify_apl_and_dxpl(hid_t *acspl_id, const H5P_libclass_t *libclass, - hid_t *dxpl_id, hid_t loc_id, hbool_t is_collective) +H5P_verify_apl_and_dxpl(hid_t *acspl_id, const H5P_libclass_t *libclass, hid_t *dxpl_id, + hid_t loc_id, hbool_t is_collective) { herr_t ret_value = SUCCEED; /* Return value */ @@ -5466,9 +5467,30 @@ H5P_verify_apl_and_dxpl(hid_t *acspl_id, const H5P_libclass_t *libclass, if(H5P_DEFAULT == *acspl_id) *acspl_id = *libclass->def_plist_id; else { +#ifdef H5_HAVE_PARALLEL + H5P_coll_md_read_flag_t is_collective; /* Collective metadata read flag */ + H5P_genplist_t *plist; /* Property list pointer */ +#endif /* H5_HAVE_PARALLEL */ + /* Sanity check the access property list class */ if(TRUE != H5P_isa_class(*acspl_id, *libclass->class_id)) HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not the required access property list") + + *dxpl_id = H5AC_ind_read_dxpl_id; + +#ifdef H5_HAVE_PARALLEL + /* Get the plist structure for the access property list */ + if(NULL == (plist = (H5P_genplist_t *)H5I_object(*acspl_id))) + HGOTO_ERROR(H5E_PLIST, H5E_BADATOM, FAIL, "can't find object for ID") + + /* Get the collective metadata read flag */ + if(H5P_peek(plist, H5_COLL_MD_READ_FLAG_NAME, &is_collective) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get core collective metadata read flag") + + /* If collective metadata read requested and using internal DXPL, switch to internal collective DXPL */ + if(H5P_USER_TRUE == is_collective) + *dxpl_id = H5AC_coll_read_dxpl_id; +#endif /* H5_HAVE_PARALLEL */ } /* end else */ #ifdef H5_HAVE_PARALLEL @@ -5493,3 +5515,4 @@ H5P_verify_apl_and_dxpl(hid_t *acspl_id, const H5P_libclass_t *libclass, done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5P_verify_apl_and_dxpl() */ + diff --git a/src/H5Plapl.c b/src/H5Plapl.c index 239f2a5..cba2f88 100644 --- a/src/H5Plapl.c +++ b/src/H5Plapl.c @@ -88,6 +88,11 @@ #define H5L_ACS_ELINK_CB_SIZE sizeof(H5L_elink_cb_t) #define H5L_ACS_ELINK_CB_DEF {NULL,NULL} +/* Definition for reading metadata collectively */ +#define H5L_ACS_COLL_MD_READ_SIZE sizeof(H5P_coll_md_read_flag_t) +#define H5L_ACS_COLL_MD_READ_DEF H5P_USER_FALSE +#define H5L_ACS_COLL_MD_READ_ENC H5P__encode_coll_md_read_flag_t +#define H5L_ACS_COLL_MD_READ_DEC H5P__decode_coll_md_read_flag_t /******************/ /* Local Typedefs */ @@ -164,7 +169,7 @@ static const char *H5L_def_elink_prefix_g = H5L_ACS_ELINK_PREFIX_DEF; /* Default static const hid_t H5L_def_fapl_id_g = H5L_ACS_ELINK_FAPL_DEF; /* Default fapl for external link access */ static const unsigned H5L_def_elink_flags_g = H5L_ACS_ELINK_FLAGS_DEF; /* Default file access flags for external link traversal */ static const H5L_elink_cb_t H5L_def_elink_cb_g = H5L_ACS_ELINK_CB_DEF; /* Default external link traversal callback */ - +static const H5P_coll_md_read_flag_t H5L_def_coll_md_read_g = H5L_ACS_COLL_MD_READ_DEF; /* Default setting for the collective metedata read flag */ /*------------------------------------------------------------------------- @@ -216,6 +221,13 @@ H5P__lacc_reg_prop(H5P_genclass_t *pclass) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class") + /* Register the metadata collective read flag */ + if(H5P_register_real(pclass, H5_COLL_MD_READ_FLAG_NAME, H5L_ACS_COLL_MD_READ_SIZE, + &H5L_def_coll_md_read_g, + NULL, NULL, NULL, H5L_ACS_COLL_MD_READ_ENC, H5L_ACS_COLL_MD_READ_DEC, + NULL, NULL, NULL, NULL) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class") + done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5P__lacc_reg_prop() */ diff --git a/src/H5Ppkg.h b/src/H5Ppkg.h index 5997845..3662cf9 100644 --- a/src/H5Ppkg.h +++ b/src/H5Ppkg.h @@ -192,6 +192,8 @@ H5_DLL herr_t H5P__decode_unsigned(const void **_pp, void *value); H5_DLL herr_t H5P__decode_uint8_t(const void **_pp, void *value); H5_DLL herr_t H5P__decode_hbool_t(const void **_pp, void *value); H5_DLL herr_t H5P__decode_double(const void **_pp, void *value); +H5_DLL herr_t H5P__encode_coll_md_read_flag_t(const void *value, void **_pp, size_t *size); +H5_DLL herr_t H5P__decode_coll_md_read_flag_t(const void **_pp, void *value); /* Private OCPL routines */ H5_DLL herr_t H5P_get_filter(const struct H5Z_filter_info_t *filter, diff --git a/src/H5Pprivate.h b/src/H5Pprivate.h index 9be6e2a..be2bceb 100644 --- a/src/H5Pprivate.h +++ b/src/H5Pprivate.h @@ -46,6 +46,14 @@ /* Library Private Typedefs */ /****************************/ +#define H5_COLL_MD_READ_FLAG_NAME "collective_metadata_read" + +typedef enum H5P_coll_md_read_flag_t { + H5P_FORCE_FALSE = -1, + H5P_USER_FALSE = 0, + H5P_USER_TRUE = 1 +} H5P_coll_md_read_flag_t; + /* Forward declarations (for prototypes & type definitions) */ struct H5O_fill_t; struct H5T_t; diff --git a/src/H5Ppublic.h b/src/H5Ppublic.h index e102776..1820625 100644 --- a/src/H5Ppublic.h +++ b/src/H5Ppublic.h @@ -351,6 +351,12 @@ H5_DLL herr_t H5Pget_file_image_callbacks(hid_t fapl_id, H5FD_file_image_callbacks_t *callbacks_ptr); H5_DLL herr_t H5Pset_core_write_tracking(hid_t fapl_id, hbool_t is_enabled, size_t page_size); H5_DLL herr_t H5Pget_core_write_tracking(hid_t fapl_id, hbool_t *is_enabled, size_t *page_size); +#ifdef H5_HAVE_PARALLEL +H5_DLL herr_t H5Pset_coll_metadata_read(hid_t plist_id, hbool_t is_collective); +H5_DLL herr_t H5Pget_coll_metadata_read(hid_t plist_id, hbool_t *is_collective); +H5_DLL herr_t H5Pset_coll_metadata_write(hid_t plist_id, hbool_t is_collective); +H5_DLL herr_t H5Pget_coll_metadata_write(hid_t plist_id, hbool_t *is_collective); +#endif /* H5_HAVE_PARALLEL */ /* Dataset creation property list (DCPL) routines */ H5_DLL herr_t H5Pset_layout(hid_t plist_id, H5D_layout_t layout); |