summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/H5A.c9
-rw-r--r--src/H5AC.c69
-rw-r--r--src/H5ACmpio.c8
-rw-r--r--src/H5ACprivate.h6
-rw-r--r--src/H5C.c570
-rw-r--r--src/H5Cmpio.c276
-rw-r--r--src/H5Cpkg.h274
-rw-r--r--src/H5Cprivate.h5
-rw-r--r--src/H5D.c2
-rw-r--r--src/H5Dchunk.c23
-rw-r--r--src/H5Ddeprec.c3
-rw-r--r--src/H5F.c1
-rw-r--r--src/H5FDmpio.c78
-rw-r--r--src/H5Fint.c10
-rw-r--r--src/H5Fmpi.c29
-rw-r--r--src/H5Fpkg.h4
-rw-r--r--src/H5Fprivate.h2
-rw-r--r--src/H5Pdxpl.c16
-rw-r--r--src/H5Pfapl.c292
-rw-r--r--src/H5Pint.c30
-rw-r--r--src/H5Plapl.c14
-rw-r--r--src/H5Ppkg.h2
-rw-r--r--src/H5Pprivate.h8
-rw-r--r--src/H5Ppublic.h6
24 files changed, 1624 insertions, 113 deletions
diff --git a/src/H5A.c b/src/H5A.c
index 0a2f983..ccda374 100644
--- a/src/H5A.c
+++ b/src/H5A.c
@@ -336,7 +336,7 @@ H5Acreate_by_name(hid_t loc_id, const char *obj_name, const char *attr_name,
hbool_t loc_found = FALSE; /* Entry at 'obj_name' found */
H5T_t *type; /* Datatype to use for attribute */
H5S_t *space; /* Dataspace to use for attribute */
- hid_t dxpl_id; /* dxpl used by library */
+ hid_t dxpl_id = H5AC_dxpl_id; /* dxpl used by library */
hid_t ret_value; /* Return value */
FUNC_ENTER_API(FAIL)
@@ -365,7 +365,6 @@ H5Acreate_by_name(hid_t loc_id, const char *obj_name, const char *attr_name,
H5G_loc_reset(&obj_loc);
/* Verify access property list and get correct dxpl */
- dxpl_id = H5AC_dxpl_id;
if(H5P_verify_apl_and_dxpl(&aapl_id, H5P_CLS_AACC, &dxpl_id, loc_id, TRUE) < 0)
HGOTO_ERROR(H5E_ATTR, H5E_CANTSET, FAIL, "can't set access and transfer property lists")
@@ -1507,7 +1506,7 @@ H5Adelete_by_name(hid_t loc_id, const char *obj_name, const char *attr_name,
H5G_name_t obj_path; /* Opened object group hier. path */
H5O_loc_t obj_oloc; /* Opened object object location */
hbool_t loc_found = FALSE; /* Entry at 'obj_name' found */
- hid_t dxpl_id; /* dxpl used by library */
+ hid_t dxpl_id = H5AC_dxpl_id; /* dxpl used by library */
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_API(FAIL)
@@ -1524,7 +1523,6 @@ H5Adelete_by_name(hid_t loc_id, const char *obj_name, const char *attr_name,
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "no attribute name")
/* Verify access property list and get correct dxpl */
- dxpl_id = H5AC_dxpl_id;
if(H5P_verify_apl_and_dxpl(&lapl_id, H5P_CLS_LACC, &dxpl_id, loc_id, TRUE) < 0)
HGOTO_ERROR(H5E_ATTR, H5E_CANTSET, FAIL, "can't set access and transfer property lists")
@@ -1584,7 +1582,7 @@ H5Adelete_by_idx(hid_t loc_id, const char *obj_name, H5_index_t idx_type,
H5G_name_t obj_path; /* Opened object group hier. path */
H5O_loc_t obj_oloc; /* Opened object object location */
hbool_t loc_found = FALSE; /* Entry at 'obj_name' found */
- hid_t dxpl_id; /* dxpl used by library */
+ hid_t dxpl_id = H5AC_dxpl_id; /* dxpl used by library */
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_API(FAIL)
@@ -1603,7 +1601,6 @@ H5Adelete_by_idx(hid_t loc_id, const char *obj_name, H5_index_t idx_type,
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid iteration order specified")
/* Verify access property list and get correct dxpl */
- dxpl_id = H5AC_dxpl_id;
if(H5P_verify_apl_and_dxpl(&lapl_id, H5P_CLS_LACC, &dxpl_id, loc_id, TRUE) < 0)
HGOTO_ERROR(H5E_ATTR, H5E_CANTSET, FAIL, "can't set access and transfer property lists")
diff --git a/src/H5AC.c b/src/H5AC.c
index 41fb78d..2ffa1c6 100644
--- a/src/H5AC.c
+++ b/src/H5AC.c
@@ -80,8 +80,15 @@ hbool_t H5_PKG_INIT_VAR = FALSE;
/* Library Private Variables */
/*****************************/
-/* Default dataset transfer property list for metadata I/O calls */
-hid_t H5AC_dxpl_id = (-1);
+/* Default dataset transfer property list for metadata I/O calls (coll write, ind read) */
+hid_t H5AC_ind_read_dxpl_id = (-1);
+hid_t H5AC_dxpl_id;
+#ifdef H5_HAVE_PARALLEL
+/* collective metadata read property */
+hid_t H5AC_coll_read_dxpl_id = (-1);
+#endif /* H5_HAVE_PARALLEL */
+
+/* global flag for collective API sanity checks */
hbool_t H5_coll_api_sanity_check_g = false;
/*******************/
@@ -163,15 +170,15 @@ done:
herr_t
H5AC__init_package(void)
{
+#ifdef H5_HAVE_PARALLEL
+ H5P_genplist_t *xfer_plist; /* Dataset transfer property list object */
+ H5P_coll_md_read_flag_t coll_meta_read;
+#endif /* H5_HAVE_PARALLEL */
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_PACKAGE
#ifdef H5_HAVE_PARALLEL
- /* Get an ID for the metadata (H5AC) dxpl */
- if((H5AC_dxpl_id = H5P_create_id(H5P_CLS_DATASET_XFER_g, FALSE)) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, "unable to register property list")
-
/* check whether to enable strict collective function calling
sanity checks using MPI barriers */
{
@@ -182,8 +189,29 @@ H5AC__init_package(void)
H5_coll_api_sanity_check_g = (hbool_t)HDstrtol(s, NULL, 0);
}
}
+
+ /* Get an ID for the metadata (H5AC) dxpl */
+ if((H5AC_ind_read_dxpl_id = H5P_create_id(H5P_CLS_DATASET_XFER_g, FALSE)) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, "unable to register property list")
+
+ /* MSC - Temp. Remove later */
+ H5AC_dxpl_id = H5AC_ind_read_dxpl_id;
+
+ /* Get an ID for H5AC_coll_read_dxpl_id */
+ if((H5AC_coll_read_dxpl_id = H5P_create_id(H5P_CLS_DATASET_XFER_g, FALSE)) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, "unable to register property list")
+
+ /* Get the property list object */
+ if (NULL == (xfer_plist = (H5P_genplist_t *)H5I_object(H5AC_coll_read_dxpl_id)))
+ HGOTO_ERROR(H5E_CACHE, H5E_BADATOM, FAIL, "can't get new property list object")
+
+ /* set 'collective metadata read' property */
+ coll_meta_read = H5P_USER_TRUE;
+ if(H5P_set(xfer_plist, H5_COLL_MD_READ_FLAG_NAME, &coll_meta_read) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set collective metadata read flag")
+
#else /* H5_HAVE_PARALLEL */
- H5AC_dxpl_id = H5P_DATASET_XFER_DEFAULT;
+ H5AC_ind_read_dxpl_id = H5P_DATASET_XFER_DEFAULT;
#endif /* H5_HAVE_PARALLEL */
done:
@@ -214,18 +242,22 @@ H5AC_term_package(void)
if(H5_PKG_INIT_VAR) {
#ifdef H5_HAVE_PARALLEL
- if(H5AC_dxpl_id > 0) {
+ if(H5AC_ind_read_dxpl_id > 0 ||
+ H5AC_coll_read_dxpl_id > 0) {
/* Indicate more work to do */
n = 1; /* H5I */
/* Close H5AC dxpl */
- if(H5I_dec_ref(H5AC_dxpl_id) < 0)
+ if(H5I_dec_ref(H5AC_ind_read_dxpl_id) < 0 ||
+ H5I_dec_ref(H5AC_coll_read_dxpl_id) < 0)
H5E_clear_stack(NULL); /*ignore error*/
} /* end if */
#endif /* H5_HAVE_PARALLEL */
-
/* Reset static IDs */
- H5AC_dxpl_id = (-1);
+ H5AC_ind_read_dxpl_id = (-1);
+#ifdef H5_HAVE_PARALLEL
+ H5AC_coll_read_dxpl_id = (-1);
+#endif /* H5_HAVE_PARALLEL */
/* Reset interface initialization flag */
if(0 == n)
@@ -440,7 +472,12 @@ H5AC_dest(H5F_t *f, hid_t dxpl_id)
#endif /* H5AC__TRACE_FILE_ENABLED */
#ifdef H5_HAVE_PARALLEL
+ /* destroying the cache, so clear all collective entries */
+ if(H5C_clear_coll_entries(f->shared->cache, 0) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_clear_coll_entries() failed.")
+
aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(f->shared->cache);
+
if(aux_ptr)
/* Sanity check */
HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC);
@@ -578,6 +615,10 @@ H5AC_flush(H5F_t *f, hid_t dxpl_id)
#endif /* H5AC__TRACE_FILE_ENABLED */
#ifdef H5_HAVE_PARALLEL
+ /* flushing the cache, so clear all collective entries */
+ if(H5C_clear_coll_entries(f->shared->cache, 0) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_clear_coll_entries() failed.")
+
/* Attempt to flush all entries from rank 0 & Bcast clean list to other ranks */
if(H5AC__flush_entries(f, dxpl_id) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.")
@@ -735,7 +776,7 @@ H5AC_insert_entry(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t add
/* Check if we should try to flush */
if(aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold)
- if(H5AC__run_sync_point(f, H5AC_dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0)
+ if(H5AC__run_sync_point(f, H5AC_ind_read_dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.")
} /* end if */
}
@@ -874,7 +915,7 @@ H5AC_move_entry(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr, haddr_t ne
#ifdef H5_HAVE_PARALLEL
/* Check if we should try to flush */
if(NULL != aux_ptr && aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold)
- if(H5AC__run_sync_point(f, H5AC_dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0)
+ if(H5AC__run_sync_point(f, H5AC_ind_read_dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.")
#endif /* H5_HAVE_PARALLEL */
@@ -1356,7 +1397,7 @@ H5AC_unprotect(H5F_t *f, hid_t dxpl_id, const H5AC_class_t *type, haddr_t addr,
#ifdef H5_HAVE_PARALLEL
/* Check if we should try to flush */
if((aux_ptr != NULL) && (aux_ptr->dirty_bytes >= aux_ptr->dirty_bytes_threshold))
- if(H5AC__run_sync_point(f, H5AC_dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0)
+ if(H5AC__run_sync_point(f, H5AC_ind_read_dxpl_id, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't run sync point.")
#endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5ACmpio.c b/src/H5ACmpio.c
index ca2582e..3331613 100644
--- a/src/H5ACmpio.c
+++ b/src/H5ACmpio.c
@@ -2065,6 +2065,14 @@ HDfprintf(stdout, "%d:H5AC_propagate...:%u: (u/uu/i/iu/r/ru) = %zu/%u/%zu/%u/%zu
aux_ptr->rename_dirty_bytes_updates);
#endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */
+ /* clear collective access flag on half of the entries in the
+ cache and mark them as independent in case they need to be
+ evicted later. All ranks are guranteed to mark the same entires
+ since we don't modify the order of the collectively accessed
+ entries except through collective access. */
+ if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_clear_coll_entries() failed.")
+
switch(aux_ptr->metadata_write_strategy) {
case H5AC_METADATA_WRITE_STRATEGY__PROCESS_0_ONLY:
switch(sync_point_op) {
diff --git a/src/H5ACprivate.h b/src/H5ACprivate.h
index 0ed6110..17d8776 100644
--- a/src/H5ACprivate.h
+++ b/src/H5ACprivate.h
@@ -186,7 +186,7 @@ typedef H5C_t H5AC_t;
#ifdef H5_HAVE_PARALLEL
/* Definitions for "collective metadata write" property */
#define H5AC_COLLECTIVE_META_WRITE_NAME "H5AC_collective_metadata_write"
-#define H5AC_COLLECTIVE_META_WRITE_SIZE sizeof(unsigned)
+#define H5AC_COLLECTIVE_META_WRITE_SIZE sizeof(hbool_t)
#define H5AC_COLLECTIVE_META_WRITE_DEF 0
#endif /* H5_HAVE_PARALLEL */
@@ -198,6 +198,10 @@ typedef H5C_t H5AC_t;
/* Dataset transfer property list for flush calls */
extern hid_t H5AC_dxpl_id;
+extern hid_t H5AC_ind_read_dxpl_id;
+#ifdef H5_HAVE_PARALLEL
+extern hid_t H5AC_coll_read_dxpl_id;
+#endif /* H5_HAVE_PARALLEL */
/* Default cache configuration. */
diff --git a/src/H5C.c b/src/H5C.c
index 641e098..01486f2 100644
--- a/src/H5C.c
+++ b/src/H5C.c
@@ -88,7 +88,6 @@
#include "H5Cpkg.h" /* Cache */
#include "H5Eprivate.h" /* Error handling */
#include "H5Fpkg.h" /* Files */
-#include "H5FDprivate.h" /* File drivers */
#include "H5FLprivate.h" /* Free Lists */
#include "H5Iprivate.h" /* IDs */
#include "H5MFprivate.h" /* File memory management */
@@ -156,6 +155,9 @@ static herr_t H5C_flush_ring(H5F_t *f, hid_t dxpl_id, H5C_ring_t ring,
static void * H5C_load_entry(H5F_t * f,
hid_t dxpl_id,
+#ifdef H5_HAVE_PARALLEL
+ hbool_t coll_access,
+#endif /* H5_HAVE_PARALLEL */
const H5C_class_t * type,
haddr_t addr,
void * udata);
@@ -180,6 +182,9 @@ static herr_t H5C_mark_tagged_entries(H5C_t * cache_ptr,
static herr_t H5C_flush_marked_entries(H5F_t * f,
hid_t dxpl_id);
+static herr_t H5C__generate_image(H5F_t *f, H5C_t * cache_ptr, H5C_cache_entry_t *entry_ptr,
+ hid_t dxpl_id, int64_t *entry_size_change_ptr);
+
#if H5C_DO_TAGGING_SANITY_CHECKS
static herr_t H5C_verify_tag(int id, haddr_t tag);
#endif
@@ -549,6 +554,13 @@ H5C_create(size_t max_cache_size,
cache_ptr->LRU_head_ptr = NULL;
cache_ptr->LRU_tail_ptr = NULL;
+#ifdef H5_HAVE_PARALLEL
+ cache_ptr->coll_list_len = 0;
+ cache_ptr->coll_list_size = (size_t)0;
+ cache_ptr->coll_head_ptr = NULL;
+ cache_ptr->coll_tail_ptr = NULL;
+#endif /* H5_HAVE_PARALLEL */
+
cache_ptr->cLRU_list_len = 0;
cache_ptr->cLRU_list_size = (size_t)0;
cache_ptr->cLRU_head_ptr = NULL;
@@ -970,6 +982,12 @@ H5C_expunge_entry(H5F_t *f, hid_t dxpl_id, const H5C_class_t *type,
HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "Target entry is protected.")
if(entry_ptr->is_pinned)
HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "Target entry is pinned.")
+#ifdef H5_HAVE_PARALLEL
+ if(entry_ptr->coll_access) {
+ entry_ptr->coll_access = FALSE;
+ H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, FAIL)
+ } /* end if */
+#endif /* H5_HAVE_PARALLEL */
/* If we get this far, call H5C__flush_single_entry() with the
* H5C__FLUSH_INVALIDATE_FLAG and the H5C__FLUSH_CLEAR_ONLY_FLAG.
@@ -987,8 +1005,8 @@ H5C_expunge_entry(H5F_t *f, hid_t dxpl_id, const H5C_class_t *type,
/* Delete the entry from the skip list on destroy */
flush_flags |= H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG;
- if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flush_flags, NULL) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "can't flush entry")
+ if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flush_flags, NULL, NULL) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "H5C_flush_single_entry() failed.")
#if H5C_DO_SANITY_CHECKS
if ( entry_was_dirty )
@@ -1755,6 +1773,9 @@ H5C_insert_entry(H5F_t * f,
H5AC_ring_t ring = H5C_RING_UNDEFINED;
hbool_t insert_pinned;
hbool_t flush_last;
+#ifdef H5_HAVE_PARALLEL
+ hbool_t coll_access = FALSE; /* whether access to the cache entry is done collectively */
+#endif /* H5_HAVE_PARALLEL */
hbool_t set_flush_marker;
hbool_t write_permitted = TRUE;
size_t empty_space;
@@ -1892,6 +1913,9 @@ H5C_insert_entry(H5F_t * f,
entry_ptr->aux_next = NULL;
entry_ptr->aux_prev = NULL;
+ entry_ptr->coll_next = NULL;
+ entry_ptr->coll_prev = NULL;
+
H5C__RESET_CACHE_ENTRY_STATS(entry_ptr)
if ( ( cache_ptr->flash_size_increase_possible ) &&
@@ -1993,6 +2017,45 @@ H5C_insert_entry(H5F_t * f,
H5C__UPDATE_STATS_FOR_INSERTION(cache_ptr, entry_ptr)
+#ifdef H5_HAVE_PARALLEL
+ /* Get the dataset transfer property list */
+ if(NULL == (dxpl = (H5P_genplist_t *)H5I_object(dxpl_id)))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a property list");
+
+ if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
+ coll_access = (H5P_USER_TRUE == f->coll_md_read ? TRUE : FALSE);
+
+ if(!coll_access && H5P_FORCE_FALSE != f->coll_md_read) {
+ H5P_coll_md_read_flag_t prop_value;
+
+ /* Get the property value */
+ if(H5P_get(dxpl, H5_COLL_MD_READ_FLAG_NAME, &prop_value) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "Can't get collective metadata access flag")
+ coll_access = (H5P_USER_TRUE == prop_value ? TRUE : FALSE);
+ } /* end if */
+ } /* end if */
+
+ entry_ptr->coll_access = coll_access;
+ if(coll_access) {
+ H5C__INSERT_IN_COLL_LIST(cache_ptr, entry_ptr, FAIL)
+
+ /* Make sure the size of the collective entries in the cache remain in check */
+ if(H5P_USER_TRUE == f->coll_md_read) {
+ if(cache_ptr->max_cache_size*80 < cache_ptr->coll_list_size*100) {
+ if(H5C_clear_coll_entries(cache_ptr, 1) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_clear_coll_entries() failed.")
+ } /* end if */
+ } /* end if */
+ else {
+ if(cache_ptr->max_cache_size*40 < cache_ptr->coll_list_size*100) {
+ if(H5C_clear_coll_entries(cache_ptr, 1) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "H5C_clear_coll_entries() failed.")
+ } /* end if */
+ } /* end else */
+ } /* end if */
+ entry_ptr->ind_access_while_coll = FALSE;
+#endif
+
done:
#if H5C_DO_EXTREME_SANITY_CHECKS
if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) ||
@@ -2349,6 +2412,14 @@ H5C_resize_entry(void *thing, size_t new_size)
(entry_ptr->size), (new_size))
} /* end if */
+#ifdef H5_HAVE_PARALLEL
+ if(entry_ptr->coll_access) {
+ H5C__DLL_UPDATE_FOR_SIZE_CHANGE((cache_ptr->coll_list_len), \
+ (cache_ptr->coll_list_size), \
+ (entry_ptr->size), (new_size))
+ } /* end if */
+#endif /* H5_HAVE_PARALLEL */
+
/* update the hash table */
H5C__UPDATE_INDEX_FOR_SIZE_CHANGE((cache_ptr), (entry_ptr->size),\
(new_size), (entry_ptr), (was_clean));
@@ -2575,6 +2646,9 @@ H5C_protect(H5F_t * f,
hbool_t have_write_permitted = FALSE;
hbool_t read_only = FALSE;
hbool_t flush_last;
+#ifdef H5_HAVE_PARALLEL
+ hbool_t coll_access = FALSE; /* whether access to the cache entry is done collectively */
+#endif /* H5_HAVE_PARALLEL */
hbool_t write_permitted;
size_t empty_space;
void * thing;
@@ -2616,6 +2690,21 @@ H5C_protect(H5F_t * f,
if((H5P_get(dxpl, H5AC_RING_NAME, &ring)) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, "unable to query ring value")
+#ifdef H5_HAVE_PARALLEL
+ if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
+ coll_access = (H5P_USER_TRUE == f->coll_md_read ? TRUE : FALSE);
+
+ if(!coll_access && H5P_FORCE_FALSE != f->coll_md_read) {
+ H5P_coll_md_read_flag_t prop_value;
+
+ /* get the property value */
+ if(H5P_get(dxpl, H5_COLL_MD_READ_FLAG_NAME, &prop_value) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "Can't get collective metadata access flag")
+ coll_access = (H5P_USER_TRUE == prop_value ? TRUE : FALSE);
+ } /* end if */
+ } /* end if */
+#endif /* H5_HAVE_PARALLEL */
+
/* first check to see if the target is in cache */
H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, NULL)
@@ -2627,6 +2716,62 @@ H5C_protect(H5F_t * f,
if(entry_ptr->type != type)
HGOTO_ERROR(H5E_CACHE, H5E_BADTYPE, NULL, "incorrect cache entry type")
+ /* if this is a collective metadata read, the entry is not
+ marked as collective, and is clean, it is possible that
+ other processes will not have it in its cache and will
+ expect a bcast of the entry from process 0. So process 0
+ will bcast the entry to all other ranks. Ranks that do have
+ the entry in their cache still have to participate in the
+ bcast. */
+#ifdef H5_HAVE_PARALLEL
+ if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI) && coll_access &&
+ !(entry_ptr->is_dirty) && !(entry_ptr->coll_access)) {
+ MPI_Comm comm; /* File MPI Communicator */
+ int mpi_code; /* MPI error code */
+ int buf_size;
+
+ if(MPI_COMM_NULL == (comm = H5F_mpi_get_comm(f)))
+ HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "get_comm request failed")
+
+ if(entry_ptr->image_ptr == NULL) {
+ int mpi_rank;
+ size_t image_size;
+
+ if((mpi_rank = H5F_mpi_get_rank(f)) < 0)
+ HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "Can't get MPI rank")
+
+ if(entry_ptr->compressed)
+ image_size = entry_ptr->compressed_size;
+ else
+ image_size = entry_ptr->size;
+ HDassert(image_size > 0);
+
+ if(NULL == (entry_ptr->image_ptr = H5MM_malloc(image_size + H5C_IMAGE_EXTRA_SPACE)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, NULL, "memory allocation failed for on disk image buffer")
+#if H5C_DO_MEMORY_SANITY_CHECKS
+ HDmemcpy(((uint8_t *)entry_ptr->image_ptr) + image_size,
+ H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE);
+#endif /* H5C_DO_MEMORY_SANITY_CHECKS */
+ if(0 == mpi_rank)
+ if(H5C__generate_image(f, cache_ptr, entry_ptr, dxpl_id, NULL) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, "Can't get Image")
+ } /* end if */
+
+ HDassert(entry_ptr->image_ptr);
+
+ H5_CHECKED_ASSIGN(buf_size, int, entry_ptr->size, size_t);
+ if(MPI_SUCCESS != (mpi_code = MPI_Bcast(entry_ptr->image_ptr, buf_size, MPI_BYTE, 0, comm)))
+ HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code)
+
+ entry_ptr->coll_access = TRUE;
+
+ H5C__INSERT_IN_COLL_LIST(cache_ptr, entry_ptr, NULL)
+ } /* end if */
+ else if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI) && coll_access && entry_ptr->coll_access) {
+ H5C__MOVE_TO_TOP_IN_COLL_LIST(cache_ptr, entry_ptr, NULL)
+ } /* end else-if */
+#endif /* H5_HAVE_PARALLEL */
+
#if H5C_DO_TAGGING_SANITY_CHECKS
{
haddr_t tag = HADDR_UNDEF;
@@ -2659,7 +2804,11 @@ H5C_protect(H5F_t * f,
hit = FALSE;
- thing = H5C_load_entry(f, dxpl_id, type, addr, udata);
+ thing = H5C_load_entry(f, dxpl_id,
+#ifdef H5_HAVE_PARALLEL
+ coll_access,
+#endif /* H5_HAVE_PARALLEL */
+ type, addr, udata);
if ( thing == NULL ) {
@@ -2668,6 +2817,10 @@ H5C_protect(H5F_t * f,
entry_ptr = (H5C_cache_entry_t *)thing;
entry_ptr->ring = ring;
+#ifdef H5_HAVE_PARALLEL
+ if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI) && entry_ptr->coll_access)
+ H5C__INSERT_IN_COLL_LIST(cache_ptr, entry_ptr, NULL)
+#endif /* H5_HAVE_PARALLEL */
/* Apply tag to newly protected entry */
if(H5C_tag_entry(cache_ptr, entry_ptr, dxpl_id) < 0)
@@ -2901,6 +3054,24 @@ H5C_protect(H5F_t * f,
}
}
+#ifdef H5_HAVE_PARALLEL
+ if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
+ /* Make sure the size of the collective entries in the cache remain in check */
+ if(TRUE == coll_access) {
+ if(H5P_USER_TRUE == f->coll_md_read) {
+ if(cache_ptr->max_cache_size * 80 < cache_ptr->coll_list_size * 100)
+ if(H5C_clear_coll_entries(cache_ptr, 1) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, "H5C_clear_coll_entries() failed.")
+ } /* end if */
+ else {
+ if(cache_ptr->max_cache_size * 40 < cache_ptr->coll_list_size * 100)
+ if(H5C_clear_coll_entries(cache_ptr, 1) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, "H5C_clear_coll_entries() failed.")
+ } /* end else */
+ } /* end if */
+ } /* end if */
+#endif /* H5_HAVE_PARALLEL */
+
done:
#if H5C_DO_EXTREME_SANITY_CHECKS
@@ -4544,7 +4715,7 @@ H5C_unprotect(H5F_t * f,
/* Delete the entry from the skip list on destroy */
flush_flags |= H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG;
- if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flush_flags, NULL) < 0)
+ if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flush_flags, NULL, NULL) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "Can't flush entry")
#if H5C_DO_SANITY_CHECKS
@@ -4570,7 +4741,7 @@ H5C_unprotect(H5F_t * f,
else if(test_entry_ptr != entry_ptr)
HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "hash table contains multiple entries for addr?!?.")
- if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0)
+ if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL, NULL) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "Can't clear entry")
}
#endif /* H5_HAVE_PARALLEL */
@@ -5863,7 +6034,7 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f,
cache_ptr->entries_removed_counter = 0;
cache_ptr->last_entry_removed_ptr = NULL;
- if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__NO_FLAGS_SET, NULL) < 0)
+ if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__NO_FLAGS_SET, NULL, NULL) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry")
if ( ( cache_ptr->entries_removed_counter > 1 ) ||
@@ -5875,7 +6046,7 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f,
bytes_evicted += entry_ptr->size;
- if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0 )
+ if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL, NULL) < 0 )
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry")
}
@@ -5955,7 +6126,7 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f,
prev_ptr = entry_ptr->prev;
if ( ! (entry_ptr->is_dirty) ) {
- if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0)
+ if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL, NULL) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush clean entry")
}
/* just skip the entry if it is dirty, as we can't do
@@ -6803,7 +6974,7 @@ H5C_flush_invalidate_ring(const H5F_t * f, hid_t dxpl_id, H5C_ring_t ring,
entry_size_change = 0;
#endif /* H5C_DO_SANITY_CHECKS */
- if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__NO_FLAGS_SET, entry_size_change_ptr) < 0)
+ if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__NO_FLAGS_SET, entry_size_change_ptr, NULL) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "dirty pinned entry flush failed.")
#if H5C_DO_SANITY_CHECKS
/* entry size may have changed during the flush.
@@ -6848,9 +7019,9 @@ H5C_flush_invalidate_ring(const H5F_t * f, hid_t dxpl_id, H5C_ring_t ring,
entry_size_change = 0;
#endif /* H5C_DO_SANITY_CHECKS */
- if(H5C__flush_single_entry(f, dxpl_id, entry_ptr,
+ if(H5C__flush_single_entry(f, dxpl_id, entry_ptr,
(cooked_flags | H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG),
- entry_size_change_ptr) < 0)
+ entry_size_change_ptr, NULL) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "dirty entry flush destroy failed.")
#if H5C_DO_SANITY_CHECKS
/* entry size may have changed during the flush.
@@ -6971,8 +7142,8 @@ H5C_flush_invalidate_ring(const H5F_t * f, hid_t dxpl_id, H5C_ring_t ring,
entry_was_dirty = entry_ptr->is_dirty;
if(H5C__flush_single_entry(f, dxpl_id, entry_ptr,
- (cooked_flags | H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG),
- NULL) < 0)
+ (cooked_flags | H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG),
+ NULL, NULL) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Entry flush destroy failed.")
if(entry_was_dirty) {
@@ -7342,7 +7513,7 @@ H5C_flush_ring(H5F_t *f, hid_t dxpl_id, H5C_ring_t ring, unsigned flags)
entry_size_change = 0;
#endif /* H5C_DO_SANITY_CHECKS */
- if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flags, entry_size_change_ptr) < 0)
+ if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flags, entry_size_change_ptr, NULL) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "dirty pinned entry flush failed.")
#if H5C_DO_SANITY_CHECKS
@@ -7387,7 +7558,7 @@ H5C_flush_ring(H5F_t *f, hid_t dxpl_id, H5C_ring_t ring, unsigned flags)
flushed_entries_size += (int64_t)entry_ptr->size;
entry_size_change = 0;
#endif /* H5C_DO_SANITY_CHECKS */
- if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flags, entry_size_change_ptr) < 0)
+ if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flags, entry_size_change_ptr, NULL) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush entry.")
#if H5C_DO_SANITY_CHECKS
@@ -7536,7 +7707,7 @@ done:
*/
herr_t
H5C__flush_single_entry(const H5F_t *f, hid_t dxpl_id, H5C_cache_entry_t *entry_ptr,
- unsigned flags, int64_t *entry_size_change_ptr)
+ unsigned flags, int64_t *entry_size_change_ptr, H5SL_t *collective_write_list)
{
H5C_t * cache_ptr; /* Cache for file */
hbool_t destroy; /* external flag */
@@ -7581,6 +7752,10 @@ H5C__flush_single_entry(const H5F_t *f, hid_t dxpl_id, H5C_cache_entry_t *entry_
else
destroy_entry = destroy;
+#ifdef H5_HAVE_PARALLEL
+ HDassert(FALSE == entry_ptr->coll_access);
+#endif
+
/* we will write the entry to disk if it exists, is dirty, and if the
* clear only flag is not set.
*/
@@ -7913,6 +8088,25 @@ H5C__flush_single_entry(const H5F_t *f, hid_t dxpl_id, H5C_cache_entry_t *entry_
else
image_size = entry_ptr->size;
+#ifdef H5_HAVE_PARALLEL
+ if(collective_write_list) {
+ H5C_collective_write_t *item = NULL;
+
+ if(NULL == (item = (H5C_collective_write_t *)H5MM_malloc(sizeof(H5C_collective_write_t))))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "unable to allocate skip list item")
+
+ item->length = image_size;
+ item->free_buf = FALSE;
+ item->buf = entry_ptr->image_ptr;
+ item->offset = entry_ptr->addr;
+
+ if(H5SL_insert(collective_write_list, item, &item->offset) < 0) {
+ H5MM_free(item);
+ HGOTO_ERROR(H5E_HEAP, H5E_CANTINSERT, FAIL, "unable to insert skip list item")
+ } /* end if */
+ } /* end if */
+ else
+#endif /* H5_HAVE_PARALLEL */
if(H5F_block_write(f, entry_ptr->type->mem_type, entry_ptr->addr,
image_size, dxpl_id, entry_ptr->image_ptr) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't write image to file.")
@@ -8165,6 +8359,9 @@ done:
static void *
H5C_load_entry(H5F_t * f,
hid_t dxpl_id,
+#ifdef H5_HAVE_PARALLEL
+ hbool_t coll_access,
+#endif /* H5_HAVE_PARALLEL */
const H5C_class_t * type,
haddr_t addr,
void * udata)
@@ -8183,6 +8380,11 @@ H5C_load_entry(H5F_t * f,
H5C_cache_entry_t * entry; /* Alias for thing loaded, as cache entry */
size_t len; /* Size of image in file */
unsigned u; /* Local index variable */
+#ifdef H5_HAVE_PARALLEL
+ int mpi_rank = 0; /* MPI process rank */
+ MPI_Comm comm = MPI_COMM_NULL; /* File MPI Communicator */
+ int mpi_code; /* MPI error code */
+#endif /* H5_HAVE_PARALLEL */
void * ret_value = NULL; /* Return value */
FUNC_ENTER_NOAPI_NOINIT
@@ -8327,10 +8529,37 @@ H5C_load_entry(H5F_t * f,
HDmemcpy(image + len, H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE);
#endif /* H5C_DO_MEMORY_SANITY_CHECKS */
+#ifdef H5_HAVE_PARALLEL
+ if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
+ if((mpi_rank = H5F_mpi_get_rank(f)) < 0)
+ HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "Can't get MPI rank")
+ if((comm = H5F_mpi_get_comm(f)) == MPI_COMM_NULL)
+ HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "get_comm request failed")
+ } /* end if */
+#endif /* H5_HAVE_PARALLEL */
+
/* Get the on-disk entry image */
- if(0 == (type->flags & H5C__CLASS_SKIP_READS))
- if(H5F_block_read(f, type->mem_type, addr, len, dxpl_id, image) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_READERROR, NULL, "Can't read image*")
+ if(0 == (type->flags & H5C__CLASS_SKIP_READS)) {
+#ifdef H5_HAVE_PARALLEL
+ if(!coll_access || 0 == mpi_rank) {
+#endif /* H5_HAVE_PARALLEL */
+ if(H5F_block_read(f, type->mem_type, addr, len, dxpl_id, image) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_READERROR, NULL, "Can't read image*")
+
+#ifdef H5_HAVE_PARALLEL
+ } /* end if */
+ /* if the collective metadata read optimization is turned on,
+ bcast the metadata read from process 0 to all ranks in the file
+ communicator */
+ if(coll_access) {
+ int buf_size;
+
+ H5_CHECKED_ASSIGN(buf_size, int, len, size_t);
+ if(MPI_SUCCESS != (mpi_code = MPI_Bcast(image, buf_size, MPI_BYTE, 0, comm)))
+ HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code)
+ } /* end if */
+#endif /* H5_HAVE_PARALLEL */
+ } /* end if */
/* Deserialize the on-disk image into the native memory form */
if(NULL == (thing = type->deserialize(image, len, udata, &dirty)))
@@ -8433,11 +8662,28 @@ H5C_load_entry(H5F_t * f,
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL, "free_icr callback failed")
+#ifdef H5_HAVE_PARALLEL
+ if(!coll_access || 0 == mpi_rank) {
+#endif /* H5_HAVE_PARALLEL */
/* Go get the on-disk image again */
if(H5F_block_read(f, type->mem_type, addr,
new_len, dxpl_id, image) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTLOAD, NULL, "Can't read image")
+#ifdef H5_HAVE_PARALLEL
+ }
+ /* if the collective metadata read optimization is turned on,
+ bcast the metadata read from process 0 to all ranks in the file
+ communicator */
+ if(coll_access) {
+ int buf_size;
+
+ H5_CHECKED_ASSIGN(buf_size, int, new_len, size_t);
+ if(MPI_SUCCESS != (mpi_code = MPI_Bcast(image, buf_size, MPI_BYTE, 0, comm)))
+ HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code)
+ } /* end if */
+#endif /* H5_HAVE_PARALLEL */
+
/* Deserialize on-disk image into native memory form again */
if(NULL == (thing = type->deserialize(image, new_len,
udata, &dirty)))
@@ -8526,6 +8772,8 @@ H5C_load_entry(H5F_t * f,
#ifdef H5_HAVE_PARALLEL
entry->clear_on_unprotect = FALSE;
entry->flush_immediately = FALSE;
+ entry->coll_access = coll_access;
+ entry->ind_access_while_coll = FALSE;
#endif /* H5_HAVE_PARALLEL */
entry->flush_in_progress = FALSE;
entry->destroy_in_progress = FALSE;
@@ -8546,6 +8794,9 @@ H5C_load_entry(H5F_t * f,
entry->aux_next = NULL;
entry->aux_prev = NULL;
+ entry->coll_next = NULL;
+ entry->coll_prev = NULL;
+
H5C__RESET_CACHE_ENTRY_STATS(entry);
ret_value = thing;
@@ -8738,7 +8989,14 @@ H5C_make_space_in_cache(H5F_t * f,
cache_ptr->entries_removed_counter = 0;
cache_ptr->last_entry_removed_ptr = NULL;
- if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__NO_FLAGS_SET, NULL) < 0)
+#ifdef H5_HAVE_PARALLEL
+ if(TRUE == entry_ptr->coll_access) {
+ entry_ptr->coll_access = FALSE;
+ H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, FAIL)
+ } /* end if */
+#endif
+
+ if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__NO_FLAGS_SET, NULL, NULL) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry")
if ( ( cache_ptr->entries_removed_counter > 1 ) ||
@@ -8746,14 +9004,23 @@ H5C_make_space_in_cache(H5F_t * f,
restart_scan = TRUE;
- } else if ( (cache_ptr->index_size + space_needed)
- >
- cache_ptr->max_cache_size ) {
+ } else if ( (cache_ptr->index_size + space_needed) > cache_ptr->max_cache_size
+#ifdef H5_HAVE_PARALLEL
+ && !(entry_ptr->coll_access)
+#endif /* H5_HAVE_PARALLEL */
+ ) {
#if H5C_COLLECT_CACHE_STATS
cache_ptr->entries_scanned_to_make_space++;
#endif /* H5C_COLLECT_CACHE_STATS */
- if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0)
+#ifdef H5_HAVE_PARALLEL
+ if(TRUE == entry_ptr->coll_access) {
+ entry_ptr->coll_access = FALSE;
+ H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, FAIL)
+ } /* end if */
+#endif
+
+ if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL, NULL) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry")
} else {
/* We have enough space so don't flush clean entry. */
@@ -8889,8 +9156,14 @@ H5C_make_space_in_cache(H5F_t * f,
prev_ptr = entry_ptr->aux_prev;
- if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry")
+#ifdef H5_HAVE_PARALLEL
+ if(!(entry_ptr->coll_access)) {
+#endif /* H5_HAVE_PARALLEL */
+ if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL, NULL) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry")
+#ifdef H5_HAVE_PARALLEL
+ } /* end if */
+#endif /* H5_HAVE_PARALLEL */
/* we are scanning the clean LRU, so the serialize function
* will not be called on any entry -- thus there is no
@@ -9929,3 +10202,246 @@ done:
FUNC_LEAVE_NOAPI(ret_value)
} /* H5C_get_entry_ring() */
+static herr_t
+H5C__generate_image(H5F_t *f, H5C_t * cache_ptr, H5C_cache_entry_t *entry_ptr,
+ hid_t dxpl_id, int64_t *entry_size_change_ptr)
+{
+ haddr_t new_addr = HADDR_UNDEF;
+ haddr_t old_addr = HADDR_UNDEF;
+ size_t new_len = 0;
+ size_t new_compressed_len = 0;
+ unsigned serialize_flags = H5C__SERIALIZE_NO_FLAGS_SET;
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+ HDassert(!entry_ptr->image_up_to_date);
+
+ /* reset cache_ptr->slist_changed so we can detect slist
+ * modifications in the pre_serialize call.
+ */
+ cache_ptr->slist_changed = FALSE;
+
+ /* make note of the entry's current address */
+ old_addr = entry_ptr->addr;
+
+ /* Call client's pre-serialize callback, if there's one */
+ if ( ( entry_ptr->type->pre_serialize != NULL ) &&
+ ( (entry_ptr->type->pre_serialize)(f, dxpl_id,
+ (void *)entry_ptr,
+ entry_ptr->addr,
+ entry_ptr->size,
+ entry_ptr->compressed_size,
+ &new_addr, &new_len,
+ &new_compressed_len,
+ &serialize_flags) < 0 ) ) {
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \
+ "unable to pre-serialize entry");
+ }
+
+ /* set cache_ptr->slist_change_in_pre_serialize if the
+ * slist was modified.
+ */
+ if ( cache_ptr->slist_changed )
+ cache_ptr->slist_change_in_pre_serialize = TRUE;
+
+ /* Check for any flags set in the pre-serialize callback */
+ if ( serialize_flags != H5C__SERIALIZE_NO_FLAGS_SET ) {
+ /* Check for unexpected flags from serialize callback */
+ if ( serialize_flags & ~(H5C__SERIALIZE_RESIZED_FLAG |
+ H5C__SERIALIZE_MOVED_FLAG |
+ H5C__SERIALIZE_COMPRESSED_FLAG)) {
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \
+ "unknown serialize flag(s)");
+ }
+
+#ifdef H5_HAVE_PARALLEL
+ if ( cache_ptr->aux_ptr != NULL )
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "resize/move in serialize occured in parallel case.");
+#endif
+
+ /* Resize the buffer if required */
+ if ( ( ( ! entry_ptr->compressed ) &&
+ ( serialize_flags & H5C__SERIALIZE_RESIZED_FLAG ) ) ||
+ ( ( entry_ptr->compressed ) &&
+ ( serialize_flags & H5C__SERIALIZE_COMPRESSED_FLAG ) ) ) {
+ size_t new_image_size;
+
+ if ( entry_ptr->compressed )
+ new_image_size = new_compressed_len;
+ else
+ new_image_size = new_len;
+
+ HDassert(new_image_size > 0);
+
+ /* Release the current image */
+ if ( entry_ptr->image_ptr ) {
+ entry_ptr->image_ptr = H5MM_xfree(entry_ptr->image_ptr);
+ }
+
+ /* Allocate a new image buffer */
+ entry_ptr->image_ptr =
+ H5MM_malloc(new_image_size + H5C_IMAGE_EXTRA_SPACE);
+
+ if ( NULL == entry_ptr->image_ptr )
+ {
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, \
+ "memory allocation failed for on disk image buffer");
+ }
+
+#if H5C_DO_MEMORY_SANITY_CHECKS
+
+ HDmemcpy(((uint8_t *)entry_ptr->image_ptr) + new_image_size,
+ H5C_IMAGE_SANITY_VALUE,
+ H5C_IMAGE_EXTRA_SPACE);
+
+#endif /* H5C_DO_MEMORY_SANITY_CHECKS */
+
+ } /* end if */
+
+ /* If required, update the entry and the cache data structures
+ * for a resize.
+ */
+ if ( serialize_flags & H5C__SERIALIZE_RESIZED_FLAG ) {
+
+ H5C__UPDATE_STATS_FOR_ENTRY_SIZE_CHANGE(cache_ptr, \
+ entry_ptr, new_len);
+
+ /* update the hash table for the size change*/
+ H5C__UPDATE_INDEX_FOR_SIZE_CHANGE(cache_ptr, \
+ entry_ptr->size, \
+ new_len, entry_ptr, \
+ !(entry_ptr->is_dirty));
+
+ /* The entry can't be protected since we are
+ * in the process of flushing it. Thus we must
+ * update the replacement policy data
+ * structures for the size change. The macro
+ * deals with the pinned case.
+ */
+ H5C__UPDATE_RP_FOR_SIZE_CHANGE(cache_ptr, entry_ptr, new_len);
+
+ /* as we haven't updated the cache data structures for
+ * for the flush or flush destroy yet, the entry should
+ * be in the slist. Thus update it for the size change.
+ */
+ HDassert(entry_ptr->in_slist);
+ H5C__UPDATE_SLIST_FOR_SIZE_CHANGE(cache_ptr, entry_ptr->size, \
+ new_len);
+
+ /* if defined, update *entry_size_change_ptr for the
+ * change in entry size.
+ */
+ if ( entry_size_change_ptr != NULL )
+ {
+ *entry_size_change_ptr = (int64_t)new_len;
+ *entry_size_change_ptr -= (int64_t)(entry_ptr->size);
+ }
+
+ /* finally, update the entry for its new size */
+ entry_ptr->size = new_len;
+ } /* end if */
+
+ /* If required, udate the entry and the cache data structures
+ * for a move
+ */
+ if(serialize_flags & H5C__SERIALIZE_MOVED_FLAG) {
+#if H5C_DO_SANITY_CHECKS
+ int64_t saved_slist_len_increase;
+ int64_t saved_slist_size_increase;
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ H5C__UPDATE_STATS_FOR_MOVE(cache_ptr, entry_ptr);
+
+ if ( entry_ptr->addr == old_addr ) {
+ /* we must update cache data structures for the
+ * change in address.
+ */
+
+ /* delete the entry from the hash table and the slist */
+ H5C__DELETE_FROM_INDEX(cache_ptr, entry_ptr);
+ H5C__REMOVE_ENTRY_FROM_SLIST(cache_ptr, entry_ptr);
+
+ /* update the entry for its new address */
+ entry_ptr->addr = new_addr;
+
+ /* and then reinsert in the index and slist */
+ H5C__INSERT_IN_INDEX(cache_ptr, entry_ptr, FAIL);
+
+#if H5C_DO_SANITY_CHECKS
+ /* save cache_ptr->slist_len_increase and
+ * cache_ptr->slist_size_increase before the
+ * reinsertion into the slist, and restore
+ * them afterwards to avoid skewing our sanity
+ * checking.
+ */
+ saved_slist_len_increase = cache_ptr->slist_len_increase;
+ saved_slist_size_increase = cache_ptr->slist_size_increase;
+#endif /* H5C_DO_SANITY_CHECKS */
+
+ H5C__INSERT_ENTRY_IN_SLIST(cache_ptr, entry_ptr, FAIL);
+
+#if H5C_DO_SANITY_CHECKS
+ cache_ptr->slist_len_increase = saved_slist_len_increase;
+ cache_ptr->slist_size_increase = saved_slist_size_increase;
+#endif /* H5C_DO_SANITY_CHECKS */
+ }
+ else {
+ HDassert(entry_ptr->addr == new_addr);
+ }
+ } /* end if */
+
+ if ( serialize_flags & H5C__SERIALIZE_COMPRESSED_FLAG ) {
+ /* just save the new compressed entry size in
+ * entry_ptr->compressed_size. We don't need to
+ * do more, as compressed size is only used for I/O.
+ */
+ HDassert(entry_ptr->compressed);
+ entry_ptr->compressed_size = new_compressed_len;
+ }
+ } /* end if ( serialize_flags != H5C__SERIALIZE_NO_FLAGS_SET ) */
+
+ /* Serialize object into buffer */
+ {
+ size_t image_len;
+
+ if ( entry_ptr->compressed )
+ image_len = entry_ptr->compressed_size;
+ else
+ image_len = entry_ptr->size;
+
+ /* reset cache_ptr->slist_changed so we can detect slist
+ * modifications in the serialize call.
+ */
+ cache_ptr->slist_changed = FALSE;
+
+
+ if ( entry_ptr->type->serialize(f, entry_ptr->image_ptr,
+ image_len,
+ (void *)entry_ptr) < 0) {
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \
+ "unable to serialize entry");
+ }
+
+ /* set cache_ptr->slist_change_in_serialize if the
+ * slist was modified.
+ */
+ if ( cache_ptr->slist_changed )
+ cache_ptr->slist_change_in_pre_serialize = TRUE;
+
+#if H5C_DO_MEMORY_SANITY_CHECKS
+
+ HDassert(0 == HDmemcmp(((uint8_t *)entry_ptr->image_ptr) +
+ image_len,
+ H5C_IMAGE_SANITY_VALUE,
+ H5C_IMAGE_EXTRA_SPACE));
+
+#endif /* H5C_DO_MEMORY_SANITY_CHECKS */
+
+ entry_ptr->image_up_to_date = TRUE;
+ }
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5C__generate_image */
diff --git a/src/H5Cmpio.c b/src/H5Cmpio.c
index 6e8d94c..e17aacd 100644
--- a/src/H5Cmpio.c
+++ b/src/H5Cmpio.c
@@ -39,11 +39,14 @@
#include "H5private.h" /* Generic Functions */
#include "H5ACprivate.h" /* Metadata cache */
#include "H5Cpkg.h" /* Cache */
+#include "H5Dprivate.h" /* Datasets */
#include "H5Eprivate.h" /* Error handling */
#include "H5Fpkg.h" /* Files */
+#include "H5FDprivate.h" /* File drivers */
#include "H5Iprivate.h" /* IDs */
#include "H5MMprivate.h" /* Memory management */
-
+#include "H5Pprivate.h" /* Property lists */
+#include "H5SLprivate.h" /* Skip lists */
#ifdef H5_HAVE_PARALLEL
@@ -221,6 +224,7 @@ H5C_apply_candidate_list(H5F_t * f,
H5C_cache_entry_t * entry_ptr = NULL;
H5C_cache_entry_t * flush_ptr = NULL;
H5C_cache_entry_t * delayed_ptr = NULL;
+ H5SL_t * collective_write_list = NULL;
#if H5C_DO_SANITY_CHECKS
haddr_t last_addr;
#endif /* H5C_DO_SANITY_CHECKS */
@@ -253,6 +257,12 @@ H5C_apply_candidate_list(H5F_t * f,
HDfprintf(stdout, "%s", tbl_buf);
#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+ if(f->coll_md_write) {
+ /* Create skip list of entries for collective write */
+ if(NULL == (collective_write_list = H5SL_create(H5SL_TYPE_HADDR, NULL)))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTCREATE, FAIL, "can't create skip list for entries")
+ }
+
n = num_candidates / mpi_size;
m = num_candidates % mpi_size;
HDassert(n >= 0);
@@ -357,6 +367,16 @@ H5C_apply_candidate_list(H5F_t * f,
entries_to_clear++;
entry_ptr->clear_on_unprotect = TRUE;
} /* end else */
+
+ /* Entries marked as collectively accessed and are in the
+ candidate list to clear from the cache have to be
+ removed from the coll list. This is OK since the
+ candidate list is collective and uniform across all
+ ranks. */
+ if(TRUE == entry_ptr->coll_access) {
+ entry_ptr->coll_access = FALSE;
+ H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, FAIL)
+ }
} /* end else */
} /* end for */
@@ -433,7 +453,9 @@ H5C_apply_candidate_list(H5F_t * f,
* will not call either the pre_serialize or serialize callbacks.
*/
- if(H5C__flush_single_entry(f, dxpl_id, clear_ptr, H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0)
+ if(H5C__flush_single_entry(f, dxpl_id, clear_ptr,
+ H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG,
+ NULL, NULL) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
} /* end if */
@@ -478,7 +500,8 @@ H5C_apply_candidate_list(H5F_t * f,
cache_ptr->entries_removed_counter = 0;
cache_ptr->last_entry_removed_ptr = NULL;
- if(H5C__flush_single_entry(f, dxpl_id, flush_ptr, H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0)
+ if(H5C__flush_single_entry(f, dxpl_id, flush_ptr, H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG,
+ NULL, collective_write_list) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't flush entry.")
if ( ( cache_ptr->entries_removed_counter > 1 ) ||
@@ -636,7 +659,9 @@ H5C_apply_candidate_list(H5F_t * f,
(long long)clear_ptr->addr);
#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
- if(H5C__flush_single_entry(f, dxpl_id, clear_ptr, H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0)
+ if(H5C__flush_single_entry(f, dxpl_id, clear_ptr,
+ H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG,
+ NULL, NULL) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
} /* end else-if */
@@ -652,7 +677,8 @@ H5C_apply_candidate_list(H5F_t * f,
(long long)flush_ptr->addr);
#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
- if(H5C__flush_single_entry(f, dxpl_id, flush_ptr, H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0)
+ if(H5C__flush_single_entry(f, dxpl_id, flush_ptr, H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG,
+ NULL, collective_write_list) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
} /* end else-if */
} /* end if */
@@ -686,20 +712,34 @@ H5C_apply_candidate_list(H5F_t * f,
if (delayed_ptr) {
if (delayed_ptr->clear_on_unprotect) {
+ if(H5C__flush_single_entry(f, dxpl_id, delayed_ptr, H5C__FLUSH_CLEAR_ONLY_FLAG,
+ NULL, NULL) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't flush entry collectively.")
+
entry_ptr->clear_on_unprotect = FALSE;
entries_cleared++;
} else if (delayed_ptr->flush_immediately) {
+ if(H5C__flush_single_entry(f, dxpl_id, delayed_ptr, H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG,
+ NULL, collective_write_list) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't flush entry collectively.")
+
entry_ptr->flush_immediately = FALSE;
entries_flushed++;
} /* end if */
- if(H5C__flush_single_entry(f, dxpl_id, delayed_ptr, H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't flush entry collectively.")
-
entries_flushed_collectively++;
entries_flushed_or_cleared_last++;
} /* end if */
+ if(f->coll_md_write) {
+ HDassert(collective_write_list);
+
+ /* Write collective list */
+ if(H5C_collective_write(f,
+ dxpl_id,
+ collective_write_list) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't write metadata collectively")
+ }
/* ====================================================================== *
* Finished flushing everything. *
* ====================================================================== */
@@ -719,6 +759,10 @@ done:
if(candidate_assignment_table != NULL)
candidate_assignment_table = (int *)H5MM_xfree((void *)candidate_assignment_table);
+ if(collective_write_list)
+ if(H5SL_destroy(collective_write_list, H5C_collective_write_free, NULL) < 0)
+ HDONE_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "failed to destroy skip list")
+
FUNC_LEAVE_NOAPI(ret_value)
} /* H5C_apply_candidate_list() */
@@ -1082,6 +1126,14 @@ H5C_mark_entries_as_clean(H5F_t * f,
* scan the LRU list shortly, and clear all those entries
* not currently protected.
*/
+
+ /* Make sure first that we clear the collective flag from
+ it so it can be cleared */
+ if(TRUE == entry_ptr->coll_access) {
+ entry_ptr->coll_access = FALSE;
+ H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, FAIL)
+ }
+
entry_ptr->clear_on_unprotect = TRUE;
#if H5C_DO_SANITY_CHECKS
if ( entry_ptr->is_protected ) {
@@ -1142,7 +1194,9 @@ H5C_mark_entries_as_clean(H5F_t * f,
entry_ptr = entry_ptr->prev;
entries_cleared++;
- if(H5C__flush_single_entry(f, dxpl_id, clear_ptr, H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0)
+ if(H5C__flush_single_entry(f, dxpl_id, clear_ptr,
+ H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG,
+ NULL, NULL) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
} else {
@@ -1170,7 +1224,9 @@ H5C_mark_entries_as_clean(H5F_t * f,
entry_ptr = entry_ptr->next;
entries_cleared++;
- if(H5C__flush_single_entry(f, dxpl_id, clear_ptr, H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0 )
+ if(H5C__flush_single_entry(f, dxpl_id, clear_ptr,
+ H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG,
+ NULL, NULL) < 0 )
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.")
} else {
@@ -1215,5 +1271,205 @@ done:
FUNC_LEAVE_NOAPI(ret_value)
} /* H5C_mark_entries_as_clean() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5C_clear_coll_entries
+ *
+ * Purpose: Clear half or the entire list of collective entries and
+ * mark them as independent.
+ *
+ * Return: FAIL if error is detected, SUCCEED otherwise.
+ *
+ * Programmer: Mohamad Chaarawi
+ * April, 2015
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5C_clear_coll_entries(H5C_t * cache_ptr, hbool_t partial)
+{
+ int32_t list_len, coll_entries_cleared = 0;
+ H5C_cache_entry_t * entry_ptr = NULL;
+ H5C_cache_entry_t * prev_ptr;
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+ entry_ptr = cache_ptr->coll_tail_ptr;
+ list_len = cache_ptr->coll_list_len;
+
+ while(entry_ptr && (coll_entries_cleared < (partial ? list_len/2 : list_len))) {
+ prev_ptr = entry_ptr->coll_prev;
+
+ HDassert(entry_ptr->coll_access);
+
+ entry_ptr->coll_access = FALSE;
+ H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, FAIL)
+ coll_entries_cleared ++;
+
+ entry_ptr = prev_ptr;
+ }
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5C_clear_coll_entries */
+
+herr_t
+H5C_collective_write(H5F_t *f,
+ hid_t dxpl_id,
+ H5SL_t *collective_write_list)
+{
+ H5P_genplist_t *plist = NULL;
+ H5FD_mpio_xfer_t xfer_mode = H5FD_MPIO_COLLECTIVE;
+ H5FD_mpio_xfer_t orig_xfer_mode;
+ H5SL_node_t *node;
+ H5C_collective_write_t *item;
+ int count;
+ void *base_buf;
+ int *length_array = NULL;
+ MPI_Aint *buf_array = NULL;
+ MPI_Aint *offset_array = NULL;
+ MPI_Datatype btype;
+ MPI_Datatype ftype;
+ hbool_t btype_created = FALSE;
+ hbool_t ftype_created = FALSE;
+ int mpi_code;
+ int i;
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+ count = (int)H5SL_count(collective_write_list);
+
+ if(NULL == (plist = (H5P_genplist_t *)H5I_object(dxpl_id)))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a data transfer property list")
+
+ if(H5P_get(plist, H5D_XFER_IO_XFER_MODE_NAME, &orig_xfer_mode) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O property")
+
+ if(count > 0) {
+ if(H5P_set(plist, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O property")
+
+ /* Allocate arrays */
+ if(NULL == (length_array = (int *)H5MM_malloc((size_t)count * sizeof(int))))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for collective write table length array")
+ if(NULL == (buf_array = (MPI_Aint *)H5MM_malloc((size_t)count * sizeof(MPI_Aint))))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for collective buf table length array")
+ if(NULL == (offset_array = (MPI_Aint *)H5MM_malloc((size_t)count * sizeof(MPI_Aint))))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for collective offset table length array")
+
+ /* Fill arrays */
+ node = H5SL_first(collective_write_list);
+ HDassert(node);
+ if(NULL == (item = (H5C_collective_write_t *)H5SL_item(node)))
+ HGOTO_ERROR(H5E_CACHE, H5E_NOTFOUND, FAIL, "can't retrieve skip list item")
+
+ length_array[0] = (int)item->length;
+ base_buf = item->buf;
+ buf_array[0] = (MPI_Aint)0;
+ offset_array[0] = (MPI_Aint)item->offset;
+
+ node = H5SL_next(node);
+ i = 1;
+ while(node) {
+ if(NULL == (item = (H5C_collective_write_t *)H5SL_item(node)))
+ HGOTO_ERROR(H5E_CACHE, H5E_NOTFOUND, FAIL, "can't retrieve skip list item")
+
+ length_array[i] = (int)item->length;
+ buf_array[i] = (MPI_Aint)item->buf - (MPI_Aint)base_buf;
+ offset_array[i] = (MPI_Aint)item->offset;
+ node = H5SL_next(node);
+ i++;
+ } /* end while */
+
+ /* Create memory mpi type */
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed(count, length_array, buf_array, MPI_BYTE, &btype)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
+ btype_created = TRUE;
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(&btype)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
+
+ /* Create file mpi type */
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed(count, length_array, offset_array, MPI_BYTE, &ftype)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
+ ftype_created = TRUE;
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(&ftype)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
+
+ /* Pass buf type, file type to the file driver. */
+ if(H5FD_mpi_setup_collective(dxpl_id, &btype, &ftype) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O properties")
+
+ /* Write data */
+ if(H5F_block_write(f, H5FD_MEM_DEFAULT, (haddr_t)0, (size_t)1, dxpl_id, base_buf) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to write entries collectively")
+ } /* end if */
+ else {
+ MPI_Status mpi_stat;
+ MPI_File mpi_fh_p;
+ MPI_File mpi_fh;
+
+ if(H5F_get_mpi_handle(f, (MPI_File **)&mpi_fh_p) < 0)
+ HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get mpi file handle")
+ mpi_fh = *(MPI_File*)mpi_fh_p;
+
+ /* just to match up with the 1st MPI_File_set_view from H5FD_mpio_write() */
+ if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(mpi_fh, (MPI_Offset)0, MPI_BYTE,
+ MPI_BYTE, "native", MPI_INFO_NULL)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
+
+ /* just to match up with MPI_File_write_at_all from H5FD_mpio_write() */
+ HDmemset(&mpi_stat, 0, sizeof(MPI_Status));
+ if(MPI_SUCCESS != (mpi_code = MPI_File_write_at_all(mpi_fh, (MPI_Offset)0, NULL, 0,
+ MPI_BYTE, &mpi_stat)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code)
+
+ /* just to match up with the 2nd MPI_File_set_view (reset) in H5FD_mpio_write() */
+ if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(mpi_fh, (MPI_Offset)0, MPI_BYTE,
+ MPI_BYTE, "native", MPI_INFO_NULL)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
+ } /* end else */
+
+done:
+ /* Free arrays */
+ length_array = (int *)H5MM_xfree(length_array);
+ buf_array = (MPI_Aint *)H5MM_xfree(buf_array);
+ offset_array = (MPI_Aint *)H5MM_xfree(offset_array);
+
+ /* Free MPI Types */
+ if(btype_created && MPI_SUCCESS != (mpi_code = MPI_Type_free(&btype)))
+ HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+ if(ftype_created && MPI_SUCCESS != (mpi_code = MPI_Type_free(&ftype)))
+ HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+
+ /* Reset dxpl */
+ if(orig_xfer_mode != H5FD_MPIO_COLLECTIVE) {
+ HDassert(plist);
+ if(H5P_set(plist, H5D_XFER_IO_XFER_MODE_NAME, &orig_xfer_mode) < 0)
+ HDONE_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O property")
+ } /* end if */
+
+ FUNC_LEAVE_NOAPI(ret_value);
+} /* end H5C_collective_write() */
+
+herr_t
+H5C_collective_write_free(void *_item, void H5_ATTR_UNUSED *key, void H5_ATTR_UNUSED *op_data)
+{
+ H5C_collective_write_t *item = (H5C_collective_write_t *)_item;
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+ HDassert(item);
+
+ if(item->free_buf)
+ item->buf = H5MM_xfree(item->buf);
+ /*!FIXME change to use free list for items */
+ H5MM_free(item);
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+} /* end H5C_collective_write_free() */
#endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5Cpkg.h b/src/H5Cpkg.h
index 9508b98..2f4d137 100644
--- a/src/H5Cpkg.h
+++ b/src/H5Cpkg.h
@@ -2542,7 +2542,16 @@ if ( ( (cache_ptr)->index_size != \
HDassert( ((entry_ptr)->ro_ref_count) == 0 ); \
HDassert( (entry_ptr)->size > 0 ); \
HDassert( new_size > 0 ); \
- \
+ \
+ if ( (entry_ptr)->coll_access ) { \
+ \
+ H5C__DLL_UPDATE_FOR_SIZE_CHANGE((cache_ptr)->coll_list_len, \
+ (cache_ptr)->coll_list_size, \
+ (entry_ptr)->size, \
+ (new_size)); \
+ \
+ } \
+ \
if ( (entry_ptr)->is_pinned ) { \
\
H5C__DLL_UPDATE_FOR_SIZE_CHANGE((cache_ptr)->pel_len, \
@@ -2892,6 +2901,247 @@ if ( ( (cache_ptr)->index_size != \
#endif /* H5C_MAINTAIN_CLEAN_AND_DIRTY_LRU_LISTS */
+#ifdef H5_HAVE_PARALLEL
+
+#if H5C_DO_SANITY_CHECKS
+
+#define H5C__COLL_DLL_PRE_REMOVE_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv) \
+if ( ( (hd_ptr) == NULL ) || \
+ ( (tail_ptr) == NULL ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (len) <= 0 ) || \
+ ( (Size) < (entry_ptr)->size ) || \
+ ( ( (Size) == (entry_ptr)->size ) && ( ! ( (len) == 1 ) ) ) || \
+ ( ( (entry_ptr)->coll_prev == NULL ) && ( (hd_ptr) != (entry_ptr) ) ) || \
+ ( ( (entry_ptr)->coll_next == NULL ) && ( (tail_ptr) != (entry_ptr) ) ) || \
+ ( ( (len) == 1 ) && \
+ ( ! ( ( (hd_ptr) == (entry_ptr) ) && ( (tail_ptr) == (entry_ptr) ) && \
+ ( (entry_ptr)->coll_next == NULL ) && \
+ ( (entry_ptr)->coll_prev == NULL ) && \
+ ( (Size) == (entry_ptr)->size ) \
+ ) \
+ ) \
+ ) \
+ ) { \
+ HDassert(0 && "coll DLL pre remove SC failed"); \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "coll DLL pre remove SC failed") \
+}
+
+#define H5C__COLL_DLL_SC(head_ptr, tail_ptr, len, Size, fv) \
+if ( ( ( ( (head_ptr) == NULL ) || ( (tail_ptr) == NULL ) ) && \
+ ( (head_ptr) != (tail_ptr) ) \
+ ) || \
+ ( (len) < 0 ) || \
+ ( (Size) < 0 ) || \
+ ( ( (len) == 1 ) && \
+ ( ( (head_ptr) != (tail_ptr) ) || ( (Size) <= 0 ) || \
+ ( (head_ptr) == NULL ) || ( (head_ptr)->size != (Size) ) \
+ ) \
+ ) || \
+ ( ( (len) >= 1 ) && \
+ ( ( (head_ptr) == NULL ) || ( (head_ptr)->coll_prev != NULL ) || \
+ ( (tail_ptr) == NULL ) || ( (tail_ptr)->coll_next != NULL ) \
+ ) \
+ ) \
+ ) { \
+ HDassert(0 && "COLL DLL sanity check failed"); \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "COLL DLL sanity check failed") \
+}
+
+#define H5C__COLL_DLL_PRE_INSERT_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv) \
+if ( ( (entry_ptr) == NULL ) || \
+ ( (entry_ptr)->coll_next != NULL ) || \
+ ( (entry_ptr)->coll_prev != NULL ) || \
+ ( ( ( (hd_ptr) == NULL ) || ( (tail_ptr) == NULL ) ) && \
+ ( (hd_ptr) != (tail_ptr) ) \
+ ) || \
+ ( (len) < 0 ) || \
+ ( ( (len) == 1 ) && \
+ ( ( (hd_ptr) != (tail_ptr) ) || ( (Size) <= 0 ) || \
+ ( (hd_ptr) == NULL ) || ( (hd_ptr)->size != (Size) ) \
+ ) \
+ ) || \
+ ( ( (len) >= 1 ) && \
+ ( ( (hd_ptr) == NULL ) || ( (hd_ptr)->coll_prev != NULL ) || \
+ ( (tail_ptr) == NULL ) || ( (tail_ptr)->coll_next != NULL ) \
+ ) \
+ ) \
+ ) { \
+ HDassert(0 && "COLL DLL pre insert SC failed"); \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "COLL DLL pre insert SC failed") \
+}
+
+#else /* H5C_DO_SANITY_CHECKS */
+
+#define H5C__COLL_DLL_PRE_REMOVE_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv)
+#define H5C__COLL_DLL_SC(head_ptr, tail_ptr, len, Size, fv)
+#define H5C__COLL_DLL_PRE_INSERT_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv)
+
+#endif /* H5C_DO_SANITY_CHECKS */
+
+
+#define H5C__COLL_DLL_APPEND(entry_ptr, head_ptr, tail_ptr, len, Size, fail_val) \
+{ \
+ H5C__COLL_DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, \
+ fail_val) \
+ if ( (head_ptr) == NULL ) \
+ { \
+ (head_ptr) = (entry_ptr); \
+ (tail_ptr) = (entry_ptr); \
+ } \
+ else \
+ { \
+ (tail_ptr)->coll_next = (entry_ptr); \
+ (entry_ptr)->coll_prev = (tail_ptr); \
+ (tail_ptr) = (entry_ptr); \
+ } \
+ (len)++; \
+ (Size) += entry_ptr->size; \
+} /* H5C__COLL_DLL_APPEND() */
+
+#define H5C__COLL_DLL_PREPEND(entry_ptr, head_ptr, tail_ptr, len, Size, fv) \
+{ \
+ H5C__COLL_DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv)\
+ if ( (head_ptr) == NULL ) \
+ { \
+ (head_ptr) = (entry_ptr); \
+ (tail_ptr) = (entry_ptr); \
+ } \
+ else \
+ { \
+ (head_ptr)->coll_prev = (entry_ptr); \
+ (entry_ptr)->coll_next = (head_ptr); \
+ (head_ptr) = (entry_ptr); \
+ } \
+ (len)++; \
+ (Size) += entry_ptr->size; \
+} /* H5C__COLL_DLL_PREPEND() */
+
+#define H5C__COLL_DLL_REMOVE(entry_ptr, head_ptr, tail_ptr, len, Size, fv) \
+{ \
+ H5C__COLL_DLL_PRE_REMOVE_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv)\
+ { \
+ if ( (head_ptr) == (entry_ptr) ) \
+ { \
+ (head_ptr) = (entry_ptr)->coll_next; \
+ if ( (head_ptr) != NULL ) \
+ (head_ptr)->coll_prev = NULL; \
+ } \
+ else \
+ { \
+ (entry_ptr)->coll_prev->coll_next = (entry_ptr)->coll_next; \
+ } \
+ if ( (tail_ptr) == (entry_ptr) ) \
+ { \
+ (tail_ptr) = (entry_ptr)->coll_prev; \
+ if ( (tail_ptr) != NULL ) \
+ (tail_ptr)->coll_next = NULL; \
+ } \
+ else \
+ (entry_ptr)->coll_next->coll_prev = (entry_ptr)->coll_prev; \
+ entry_ptr->coll_next = NULL; \
+ entry_ptr->coll_prev = NULL; \
+ (len)--; \
+ (Size) -= entry_ptr->size; \
+ } \
+} /* H5C__COLL_DLL_REMOVE() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Macro: H5C__INSERT_IN_COLL_LIST
+ *
+ * Purpose: Insert entry into collective entries list
+ *
+ * Return: N/A
+ *
+ * Programmer: Mohamad Chaarawi
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define H5C__INSERT_IN_COLL_LIST(cache_ptr, entry_ptr, fail_val) \
+{ \
+ HDassert( (cache_ptr) ); \
+ HDassert( (cache_ptr)->magic == H5C__H5C_T_MAGIC ); \
+ HDassert( (entry_ptr) ); \
+ \
+ /* insert the entry at the head of the list. */ \
+ \
+ H5C__COLL_DLL_PREPEND((entry_ptr), (cache_ptr)->coll_head_ptr, \
+ (cache_ptr)->coll_tail_ptr, \
+ (cache_ptr)->coll_list_len, \
+ (cache_ptr)->coll_list_size, \
+ (fail_val)) \
+ \
+} /* H5C__INSERT_IN_COLL_LIST */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Macro: H5C__REMOVE_FROM_COLL_LIST
+ *
+ * Purpose: Remove entry from collective entries list
+ *
+ * Return: N/A
+ *
+ * Programmer: Mohamad Chaarawi
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define H5C__REMOVE_FROM_COLL_LIST(cache_ptr, entry_ptr, fail_val) \
+{ \
+ HDassert( (cache_ptr) ); \
+ HDassert( (cache_ptr)->magic == H5C__H5C_T_MAGIC ); \
+ HDassert( (entry_ptr) ); \
+ \
+ /* remove the entry from the list. */ \
+ \
+ H5C__COLL_DLL_REMOVE((entry_ptr), (cache_ptr)->coll_head_ptr, \
+ (cache_ptr)->coll_tail_ptr, \
+ (cache_ptr)->coll_list_len, \
+ (cache_ptr)->coll_list_size, \
+ (fail_val)) \
+ \
+} /* H5C__REMOVE_FROM_COLL_LIST */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Macro: H5C__MOVE_TO_TOP_IN_COLL_LIST
+ *
+ * Purpose: Update entry position in collective entries list
+ *
+ * Return: N/A
+ *
+ * Programmer: Mohamad Chaarawi
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define H5C__MOVE_TO_TOP_IN_COLL_LIST(cache_ptr, entry_ptr, fail_val) \
+{ \
+ HDassert( (cache_ptr) ); \
+ HDassert( (cache_ptr)->magic == H5C__H5C_T_MAGIC ); \
+ HDassert( (entry_ptr) ); \
+ \
+ /* Remove entry and insert at the head of the list. */ \
+ H5C__COLL_DLL_REMOVE((entry_ptr), (cache_ptr)->coll_head_ptr, \
+ (cache_ptr)->coll_tail_ptr, \
+ (cache_ptr)->coll_list_len, \
+ (cache_ptr)->coll_list_size, \
+ (fail_val)) \
+ \
+ H5C__COLL_DLL_PREPEND((entry_ptr), (cache_ptr)->coll_head_ptr, \
+ (cache_ptr)->coll_tail_ptr, \
+ (cache_ptr)->coll_list_len, \
+ (cache_ptr)->coll_list_size, \
+ (fail_val)) \
+ \
+} /* H5C__MOVE_TO_TOP_IN_COLL_LIST */
+#endif /* H5_HAVE_PARALLEL */
+
/****************************/
/* Package Private Typedefs */
@@ -3885,6 +4135,13 @@ struct H5C_t {
H5C_cache_entry_t * dLRU_head_ptr;
H5C_cache_entry_t * dLRU_tail_ptr;
+#ifdef H5_HAVE_PARALLEL
+ int32_t coll_list_len;
+ size_t coll_list_size;
+ H5C_cache_entry_t * coll_head_ptr;
+ H5C_cache_entry_t * coll_tail_ptr;
+#endif /* H5_HAVE_PARALLEL */
+
/* Fields for automatic cache size adjustment */
hbool_t size_increase_possible;
hbool_t flash_size_increase_possible;
@@ -3988,6 +4245,15 @@ struct H5C_t {
#endif /* NDEBUG */
};
+#ifdef H5_HAVE_PARALLEL
+typedef struct H5C_collective_write_t {
+ size_t length;
+ hbool_t free_buf;
+ void *buf;
+ haddr_t offset;
+} H5C_collective_write_t;
+#endif /* H5_HAVE_PARALLEL */
+
/*****************************/
/* Package Private Variables */
/*****************************/
@@ -3997,7 +4263,11 @@ struct H5C_t {
/* Package Private Prototypes */
/******************************/
H5_DLL herr_t H5C__flush_single_entry(const H5F_t *f, hid_t dxpl_id,
- H5C_cache_entry_t *entry_ptr, unsigned flags, int64_t *entry_size_change_ptr);
+ H5C_cache_entry_t *entry_ptr, unsigned flags, int64_t *entry_size_change_ptr, H5SL_t *collective_write_list);
+#ifdef H5_HAVE_PARALLEL
+H5_DLL herr_t H5C_collective_write(H5F_t *f, hid_t dxpl_id, H5SL_t *collective_write_list);
+H5_DLL herr_t H5C_collective_write_free(void *_item, void *key, void *op_data);
+#endif /* H5_HAVE_PARALLEL */
#endif /* _H5Cpkg_H */
diff --git a/src/H5Cprivate.h b/src/H5Cprivate.h
index da9e6f5..5ad026b 100644
--- a/src/H5Cprivate.h
+++ b/src/H5Cprivate.h
@@ -1608,6 +1608,8 @@ typedef struct H5C_cache_entry_t {
#ifdef H5_HAVE_PARALLEL
hbool_t clear_on_unprotect;
hbool_t flush_immediately;
+ hbool_t coll_access;
+ hbool_t ind_access_while_coll;
#endif /* H5_HAVE_PARALLEL */
hbool_t flush_in_progress;
hbool_t destroy_in_progress;
@@ -1631,6 +1633,8 @@ typedef struct H5C_cache_entry_t {
struct H5C_cache_entry_t * prev;
struct H5C_cache_entry_t * aux_next;
struct H5C_cache_entry_t * aux_prev;
+ struct H5C_cache_entry_t * coll_next;
+ struct H5C_cache_entry_t * coll_prev;
#if H5C_COLLECT_CACHE_ENTRY_STATS
/* cache entry stats fields */
@@ -1996,6 +2000,7 @@ H5_DLL herr_t H5C_apply_candidate_list(H5F_t *f, hid_t dxpl_id,
int mpi_rank, int mpi_size);
H5_DLL herr_t H5C_construct_candidate_list__clean_cache(H5C_t *cache_ptr);
H5_DLL herr_t H5C_construct_candidate_list__min_clean(H5C_t *cache_ptr);
+H5_DLL herr_t H5C_clear_coll_entries(H5C_t * cache_ptr, hbool_t partial);
H5_DLL herr_t H5C_mark_entries_as_clean(H5F_t *f, hid_t dxpl_id, int32_t ce_array_len,
haddr_t *ce_array_ptr);
#endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5D.c b/src/H5D.c
index bcc2888..4dccd0d 100644
--- a/src/H5D.c
+++ b/src/H5D.c
@@ -910,7 +910,7 @@ H5Dset_extent(hid_t dset_id, const hsize_t size[])
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "no size specified")
/* Private function */
- if(H5D__set_extent(dset, size, H5AC_dxpl_id) < 0)
+ if(H5D__set_extent(dset, size, H5AC_coll_read_dxpl_id) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to set extend dataset")
done:
diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c
index b0abdf1..8194cf4 100644
--- a/src/H5Dchunk.c
+++ b/src/H5Dchunk.c
@@ -45,7 +45,7 @@
/****************/
#include "H5Dmodule.h" /* This source code file is part of the H5D module */
-
+#define H5F_FRIEND /*suppress error about including H5Fpkg */
/***********/
/* Headers */
@@ -56,6 +56,7 @@
#endif /* H5_HAVE_PARALLEL */
#include "H5Dpkg.h" /* Dataset functions */
#include "H5Eprivate.h" /* Error handling */
+#include "H5Fpkg.h" /* File functions */
#include "H5FLprivate.h" /* Free Lists */
#include "H5Iprivate.h" /* IDs */
#include "H5MMprivate.h" /* Memory management */
@@ -2633,6 +2634,9 @@ H5D__chunk_lookup(const H5D_t *dset, hid_t dxpl_id, const hsize_t *scaled,
/* Check for cached information */
if(!H5D__chunk_cinfo_cache_found(&dset->shared->cache.chunk.last, udata)) {
H5D_chk_idx_info_t idx_info; /* Chunked index info */
+#ifdef H5_HAVE_PARALLEL
+ H5P_coll_md_read_flag_t temp_flag; /* temp flag to hold the coll metadata read setting */
+#endif /* H5_HAVE_PARALLEL */
/* Compose chunked index info struct */
idx_info.f = dset->oloc.file;
@@ -2641,10 +2645,27 @@ H5D__chunk_lookup(const H5D_t *dset, hid_t dxpl_id, const hsize_t *scaled,
idx_info.layout = &dset->shared->layout.u.chunk;
idx_info.storage = &dset->shared->layout.storage.u.chunk;
+#ifdef H5_HAVE_PARALLEL
+ if(H5F_HAS_FEATURE(idx_info.f, H5FD_FEAT_HAS_MPI)) {
+ /* disable collective metadata read for chunk indexes
+ as it is highly unlikely that users would read the
+ same chunks from all processes. MSC - might turn on
+ for root node? */
+ temp_flag = idx_info.f->coll_md_read;
+ idx_info.f->coll_md_read = H5P_FORCE_FALSE;
+ }
+#endif /* H5_HAVE_PARALLEL */
+
/* Go get the chunk information */
if((dset->shared->layout.storage.u.chunk.ops->get_addr)(&idx_info, udata) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't query chunk address")
+#ifdef H5_HAVE_PARALLEL
+ if(H5F_HAS_FEATURE(idx_info.f, H5FD_FEAT_HAS_MPI)) {
+ idx_info.f->coll_md_read = temp_flag;
+ }
+#endif /* H5_HAVE_PARALLEL */
+
/* Cache the information retrieved */
H5D__chunk_cinfo_cache_update(&dset->shared->cache.chunk.last, udata);
} /* end if */
diff --git a/src/H5Ddeprec.c b/src/H5Ddeprec.c
index a4eb67f..19b9f36 100644
--- a/src/H5Ddeprec.c
+++ b/src/H5Ddeprec.c
@@ -141,7 +141,8 @@ H5Dcreate1(hid_t loc_id, const char *name, hid_t type_id, hid_t space_id,
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not dataset create property list ID")
/* Build and open the new dataset */
- if(NULL == (dset = H5D__create_named(&loc, name, type_id, space, H5P_LINK_CREATE_DEFAULT, dcpl_id, H5P_DATASET_ACCESS_DEFAULT, H5AC_dxpl_id)))
+ if(NULL == (dset = H5D__create_named(&loc, name, type_id, space, H5P_LINK_CREATE_DEFAULT,
+ dcpl_id, H5P_DATASET_ACCESS_DEFAULT, H5AC_dxpl_id)))
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to create dataset")
/* Register the new dataset to get an ID for it */
diff --git a/src/H5F.c b/src/H5F.c
index 6c34e40..0cbd212 100644
--- a/src/H5F.c
+++ b/src/H5F.c
@@ -472,6 +472,7 @@ H5Fcreate(const char *filename, unsigned flags, hid_t fcpl_id, hid_t fapl_id)
if(TRUE != H5P_isa_class(fcpl_id, H5P_FILE_CREATE))
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not file create property list")
+
/* Verify access property list and get correct dxpl */
if(H5P_verify_apl_and_dxpl(&fapl_id, H5P_CLS_FACC, &dxpl_id, H5I_INVALID_HID, TRUE) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, "can't set access and transfer property lists")
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index bbd3eca..ed70e20 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -1696,6 +1696,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
int size_i;
hbool_t use_view_this_time = FALSE;
H5P_genplist_t *plist = NULL; /* Property list pointer */
+ H5FD_mpio_xfer_t xfer_mode; /* I/O tranfer mode */
herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI_NOINIT
@@ -1726,57 +1727,42 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
fprintf(stdout, "in H5FD_mpio_write mpi_off=%ld size_i=%d\n", (long)mpi_off, size_i);
#endif
- if(type == H5FD_MEM_DRAW) {
- H5FD_mpio_xfer_t xfer_mode; /* I/O tranfer mode */
+ /* Obtain the data transfer properties */
+ if(NULL == (plist = (H5P_genplist_t *)H5I_object(dxpl_id)))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list")
- /* Obtain the data transfer properties */
- if(NULL == (plist = (H5P_genplist_t *)H5I_object(dxpl_id)))
- HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list")
+ /* get the transfer mode from the dxpl */
+ if(H5P_get(plist, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode)<0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode")
- /* get the transfer mode from the dxpl */
- if(H5P_get(plist, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode)<0)
- HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode")
+ /*
+ * Set up for a fancy xfer using complex types, or single byte block. We
+ * wouldn't need to rely on the use_view field if MPI semantics allowed
+ * us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which
+ * could mean "use MPI_BYTE" by convention).
+ */
+ if(xfer_mode == H5FD_MPIO_COLLECTIVE) {
+ MPI_Datatype file_type;
- /*
- * Set up for a fancy xfer using complex types, or single byte block. We
- * wouldn't need to rely on the use_view field if MPI semantics allowed
- * us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which
- * could mean "use MPI_BYTE" by convention).
- */
- if(xfer_mode == H5FD_MPIO_COLLECTIVE) {
- MPI_Datatype file_type;
+ /* Remember that views are used */
+ use_view_this_time = TRUE;
- /* Remember that views are used */
- use_view_this_time = TRUE;
+ /* prepare for a full-blown xfer using btype, ftype, and disp */
+ if(H5P_get(plist, H5FD_MPI_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
+ if(H5P_get(plist, H5FD_MPI_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
- /* prepare for a full-blown xfer using btype, ftype, and disp */
- if(H5P_get(plist, H5FD_MPI_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0)
- HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
- if(H5P_get(plist, H5FD_MPI_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0)
- HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
-
- /*
- * Set the file view when we are using MPI derived types
- */
- if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, H5FD_mpi_native_g, file->info)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
+ /*
+ * Set the file view when we are using MPI derived types
+ */
+ if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, H5FD_mpi_native_g, file->info)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
- /* When using types, use the address as the displacement for
- * MPI_File_set_view and reset the address for the read to zero
- */
- mpi_off = 0;
- } /* end if */
- } /* end if */
- else {
-#if 0 /* JRM -- 3/23/10 */ /* this is no longer always the case */
- /* Only one process can do the actual metadata write */
- if(file->mpi_rank != H5_PAR_META_WRITE)
-#ifdef LATER
- HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "can't write metadata from non-zero rank")
-#else /* LATER */
- HGOTO_DONE(SUCCEED) /* skip the actual write */
-#endif /* LATER */
-#endif /* JRM */
+ /* When using types, use the address as the displacement for
+ * MPI_File_set_view and reset the address for the read to zero
+ */
+ mpi_off = 0;
} /* end if */
/* Write the data. */
@@ -1803,6 +1789,8 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code)
} /* end if */
else {
+ if(type != H5FD_MEM_DRAW)
+ HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "Metadata Coll opt property should be collective at this point")
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
fprintf(stdout, "H5FD_mpio_write: doing MPI independent IO\n");
diff --git a/src/H5Fint.c b/src/H5Fint.c
index 14f5456..ea6c9c1 100644
--- a/src/H5Fint.c
+++ b/src/H5Fint.c
@@ -172,6 +172,10 @@ H5F_get_access_plist(H5F_t *f, hbool_t app_ref)
efc_size = H5F_efc_max_nfiles(f->shared->efc);
if(H5P_set(new_plist, H5F_ACS_EFC_SIZE_NAME, &efc_size) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't set elink file cache size")
+#ifdef H5_HAVE_PARALLEL
+ if(H5P_set(new_plist, H5_COLL_MD_READ_FLAG_NAME, &(f->coll_md_read)) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't set collective metadata read flag")
+#endif /* H5_HAVE_PARALLEL */
/* Prepare the driver property */
driver_prop.driver_id = f->shared->lf->driver_id;
@@ -637,6 +641,12 @@ H5F_new(H5F_file_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5FD_t
if(efc_size > 0)
if(NULL == (f->shared->efc = H5F_efc_create(efc_size)))
HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, NULL, "can't create external file cache")
+#ifdef H5_HAVE_PARALLEL
+ if(H5P_get(plist, H5_COLL_MD_READ_FLAG_NAME, &(f->coll_md_read)) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata read flag")
+ if(H5P_get(plist, H5F_ACS_COLL_MD_WRITE_FLAG_NAME, &(f->coll_md_write)) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata write flag")
+#endif /* H5_HAVE_PARALLEL */
/* Get the VFD values to cache */
f->shared->maxaddr = H5FD_get_maxaddr(lf);
diff --git a/src/H5Fmpi.c b/src/H5Fmpi.c
index 9783947..5434aa5 100644
--- a/src/H5Fmpi.c
+++ b/src/H5Fmpi.c
@@ -77,6 +77,35 @@
#ifdef H5_HAVE_PARALLEL
+
+/*-------------------------------------------------------------------------
+ * Function: H5F_get_mpi_handle
+ *
+ * Purpose: Retrieves MPI File handle.
+ *
+ * Return: Success: The size (positive)
+ * Failure: Negative
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5F_get_mpi_handle(const H5F_t *f, MPI_File **f_handle)
+{
+ herr_t ret_value = SUCCEED;
+ hid_t fapl = -1;
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ assert(f && f->shared);
+
+ /* Dispatch to driver */
+ if ((ret_value = H5FD_get_vfd_handle(f->shared->lf, fapl, (void **)f_handle)) < 0)
+ HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get mpi file handle")
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5F_get_mpi_handle() */
+
/*-------------------------------------------------------------------------
* Function: H5F_mpi_get_rank
diff --git a/src/H5Fpkg.h b/src/H5Fpkg.h
index c62858c..be324d0 100644
--- a/src/H5Fpkg.h
+++ b/src/H5Fpkg.h
@@ -311,6 +311,10 @@ struct H5F_t {
hbool_t closing; /* File is in the process of being closed */
struct H5F_t *parent; /* Parent file that this file is mounted to */
unsigned nmounts; /* Number of children mounted to this file */
+#ifdef H5_HAVE_PARALLEL
+ H5P_coll_md_read_flag_t coll_md_read; /* Do all metadata reads collectively */
+ hbool_t coll_md_write; /* Do all metadata writes collectively */
+#endif /* H5_HAVE_PARALLEL */
};
diff --git a/src/H5Fprivate.h b/src/H5Fprivate.h
index dd2877b..7decaed 100644
--- a/src/H5Fprivate.h
+++ b/src/H5Fprivate.h
@@ -454,6 +454,7 @@
#define H5F_ACS_FILE_IMAGE_INFO_NAME "file_image_info" /* struct containing initial file image and callback info */
#define H5F_ACS_CORE_WRITE_TRACKING_FLAG_NAME "core_write_tracking_flag" /* Whether or not core VFD backing store write tracking is enabled */
#define H5F_ACS_CORE_WRITE_TRACKING_PAGE_SIZE_NAME "core_write_tracking_page_size" /* The page size in kiB when core VFD write tracking is enabled */
+#define H5F_ACS_COLL_MD_WRITE_FLAG_NAME "collective_metadata_write" /* property indicating whether metadata writes are done collectively or not */
/* ======================== File Mount properties ====================*/
#define H5F_MNT_SYM_LOCAL_NAME "local" /* Whether absolute symlinks local to file. */
@@ -676,6 +677,7 @@ H5_DLL herr_t H5F_super_dirty(H5F_t *f);
/* Parallel I/O (i.e. MPI) related routines */
#ifdef H5_HAVE_PARALLEL
+H5_DLL herr_t H5F_get_mpi_handle(const H5F_t *f, MPI_File **f_handle);
H5_DLL int H5F_mpi_get_rank(const H5F_t *f);
H5_DLL MPI_Comm H5F_mpi_get_comm(const H5F_t *f);
H5_DLL int H5F_mpi_get_size(const H5F_t *f);
diff --git a/src/H5Pdxpl.c b/src/H5Pdxpl.c
index d56a52a..1181c16 100644
--- a/src/H5Pdxpl.c
+++ b/src/H5Pdxpl.c
@@ -176,6 +176,12 @@
#define H5AC_XFER_RING_ENC H5P__encode_unsigned
#define H5AC_XFER_RING_DEC H5P__decode_unsigned
+/* Definition for reading metadata collectively */
+#define H5D_XFER_COLL_MD_READ_SIZE sizeof(H5P_coll_md_read_flag_t)
+#define H5D_XFER_COLL_MD_READ_DEF H5P_USER_FALSE
+#define H5D_XFER_COLL_MD_READ_ENC H5P__encode_coll_md_read_flag_t
+#define H5D_XFER_COLL_MD_READ_DEC H5P__decode_coll_md_read_flag_t
+
/******************/
/* Local Typedefs */
/******************/
@@ -281,6 +287,7 @@ static const hbool_t H5D_def_direct_chunk_flag_g = H5D_XFER_DIRECT_CHUNK_WRITE_F
static const uint32_t H5D_def_direct_chunk_filters_g = H5D_XFER_DIRECT_CHUNK_WRITE_FILTERS_DEF; /* Default value for the filters of direct chunk write */
static const hsize_t *H5D_def_direct_chunk_offset_g = H5D_XFER_DIRECT_CHUNK_WRITE_OFFSET_DEF; /* Default value for the offset of direct chunk write */
static const uint32_t H5D_def_direct_chunk_datasize_g = H5D_XFER_DIRECT_CHUNK_WRITE_DATASIZE_DEF; /* Default value for the datasize of direct chunk write */
+static const H5P_coll_md_read_flag_t H5D_def_coll_md_read_g = H5D_XFER_COLL_MD_READ_DEF; /* Default setting for the collective metedata read flag */
static const H5AC_ring_t H5D_ring_g = H5AC_XFER_RING_DEF; /* Default value for the cache entry ring type */
@@ -476,7 +483,14 @@ H5P__dxfr_reg_prop(H5P_genclass_t *pclass)
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class")
- /* Register the data transform property */
+ /* Register the metadata collective read flag */
+ if(H5P_register_real(pclass, H5_COLL_MD_READ_FLAG_NAME, H5D_XFER_COLL_MD_READ_SIZE,
+ &H5D_def_coll_md_read_g,
+ NULL, NULL, NULL, H5D_XFER_COLL_MD_READ_ENC, H5D_XFER_COLL_MD_READ_DEC,
+ NULL, NULL, NULL, NULL) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class")
+
+ /* Register the ring property (private) */
if(H5P_register_real(pclass, H5AC_RING_NAME, H5AC_XFER_RING_SIZE, &H5D_ring_g,
NULL, NULL, NULL, H5AC_XFER_RING_ENC, H5AC_XFER_RING_DEC,
NULL, NULL, NULL, NULL) < 0)
diff --git a/src/H5Pfapl.c b/src/H5Pfapl.c
index b92d3e9..75f9e7c 100644
--- a/src/H5Pfapl.c
+++ b/src/H5Pfapl.c
@@ -178,6 +178,17 @@
#define H5F_ACS_CORE_WRITE_TRACKING_PAGE_SIZE_DEF 524288
#define H5F_ACS_CORE_WRITE_TRACKING_PAGE_SIZE_ENC H5P__encode_size_t
#define H5F_ACS_CORE_WRITE_TRACKING_PAGE_SIZE_DEC H5P__decode_size_t
+/* Definition of collective metadata read mode flag */
+#define H5F_ACS_COLL_MD_READ_FLAG_SIZE sizeof(H5P_coll_md_read_flag_t)
+#define H5F_ACS_COLL_MD_READ_FLAG_DEF H5P_USER_FALSE
+#define H5F_ACS_COLL_MD_READ_FLAG_ENC H5P__encode_coll_md_read_flag_t
+#define H5F_ACS_COLL_MD_READ_FLAG_DEC H5P__decode_coll_md_read_flag_t
+/* Definition of collective metadata write mode flag */
+#define H5F_ACS_COLL_MD_WRITE_FLAG_SIZE sizeof(hbool_t)
+#define H5F_ACS_COLL_MD_WRITE_FLAG_DEF FALSE
+#define H5F_ACS_COLL_MD_WRITE_FLAG_ENC H5P__encode_hbool_t
+#define H5F_ACS_COLL_MD_WRITE_FLAG_DEC H5P__decode_hbool_t
+
/******************/
/* Local Typedefs */
@@ -280,6 +291,8 @@ static const unsigned H5F_def_efc_size_g = H5F_ACS_EFC_SIZE_DEF;
static const H5FD_file_image_info_t H5F_def_file_image_info_g = H5F_ACS_FILE_IMAGE_INFO_DEF; /* Default file image info and callbacks */
static const hbool_t H5F_def_core_write_tracking_flag_g = H5F_ACS_CORE_WRITE_TRACKING_FLAG_DEF; /* Default setting for core VFD write tracking */
static const size_t H5F_def_core_write_tracking_page_size_g = H5F_ACS_CORE_WRITE_TRACKING_PAGE_SIZE_DEF; /* Default core VFD write tracking page size */
+static const H5P_coll_md_read_flag_t H5F_def_coll_md_read_flag_g = H5F_ACS_COLL_MD_READ_FLAG_DEF; /* Default setting for the collective metedata read flag */
+static const hbool_t H5F_def_coll_md_write_flag_g = H5F_ACS_COLL_MD_WRITE_FLAG_DEF; /* Default setting for the collective metedata write flag */
@@ -437,6 +450,18 @@ H5P__facc_reg_prop(H5P_genclass_t *pclass)
NULL, NULL, NULL, NULL) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class")
+ /* Register the metadata collective read flag */
+ if(H5P_register_real(pclass, H5_COLL_MD_READ_FLAG_NAME, H5F_ACS_COLL_MD_READ_FLAG_SIZE, &H5F_def_coll_md_read_flag_g,
+ NULL, NULL, NULL, H5F_ACS_COLL_MD_READ_FLAG_ENC, H5F_ACS_COLL_MD_READ_FLAG_DEC,
+ NULL, NULL, NULL, NULL) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class")
+
+ /* Register the metadata collective write flag */
+ if(H5P_register_real(pclass, H5F_ACS_COLL_MD_WRITE_FLAG_NAME, H5F_ACS_COLL_MD_WRITE_FLAG_SIZE, &H5F_def_coll_md_write_flag_g,
+ NULL, NULL, NULL, H5F_ACS_COLL_MD_WRITE_FLAG_ENC, H5F_ACS_COLL_MD_WRITE_FLAG_DEC,
+ NULL, NULL, NULL, NULL) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class")
+
done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5P__facc_reg_prop() */
@@ -3481,3 +3506,270 @@ done:
FUNC_LEAVE_API(ret_value)
} /* end H5Pget_core_write_tracking() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5P__encode_coll_md_read_flag_t
+ *
+ * Purpose: Generic encoding callback routine for 'coll_md_read_flag' properties.
+ *
+ * Return: Success: Non-negative
+ * Failure: Negative
+ *
+ * Programmer: Mohamad Chaarawi
+ * Sunday, June 21, 2015
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5P__encode_coll_md_read_flag_t(const void *value, void **_pp, size_t *size)
+{
+ const H5P_coll_md_read_flag_t *coll_md_read_flag = (const H5P_coll_md_read_flag_t *)value;
+ uint8_t **pp = (uint8_t **)_pp;
+
+ FUNC_ENTER_PACKAGE_NOERR
+
+ /* Sanity checks */
+ HDassert(coll_md_read_flag);
+ HDassert(size);
+
+ if(NULL != *pp) {
+ /* Encode the value */
+ HDmemcpy(*pp, coll_md_read_flag, sizeof(H5P_coll_md_read_flag_t));
+ *pp += sizeof(H5P_coll_md_read_flag_t);
+ } /* end if */
+
+ /* Set size needed for encoding */
+ *size += sizeof(H5P_coll_md_read_flag_t);
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+} /* end H5P__encode_coll_md_read_flag_t() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5P__decode_coll_md_read_flag_t
+ *
+ * Purpose: Generic decoding callback routine for 'coll_md_read_flag' properties.
+ *
+ * Return: Success: Non-negative
+ * Failure: Negative
+ *
+ * Programmer: Mohamad Chaarawi
+ * Sunday, June 21, 2015
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5P__decode_coll_md_read_flag_t(const void **_pp, void *_value)
+{
+ H5P_coll_md_read_flag_t *coll_md_read_flag = (H5P_coll_md_read_flag_t *)_value; /* File close degree */
+ const uint8_t **pp = (const uint8_t **)_pp;
+
+ FUNC_ENTER_STATIC_NOERR
+
+ /* Sanity checks */
+ HDassert(pp);
+ HDassert(*pp);
+ HDassert(coll_md_read_flag);
+
+ /* Decode file close degree */
+ *coll_md_read_flag = (H5P_coll_md_read_flag_t)*(*pp);
+ *pp += sizeof(H5P_coll_md_read_flag_t);
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+} /* end H5P__decode_coll_md_read_flag_t() */
+
+#ifdef H5_HAVE_PARALLEL
+
+/*-------------------------------------------------------------------------
+ * Function: H5Pset_coll_metadata_read
+ *
+ * Purpose: Tell the library whether the metadata read operations will
+ * be done collectively (1) or not (0). Default is independent.
+ * With collective mode, the library will optimize access to
+ * metadata operations on the file.
+ *
+ * Note: This routine accepts file access property lists, link
+ * access property lists, attribute access property lists,
+ * dataset access property lists, group access property lists,
+ * named datatype access property lists,
+ * and dataset transfer property lists.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: Mohamad Chaarawi
+ * Sunday, June 21, 2015
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5Pset_coll_metadata_read(hid_t plist_id, hbool_t is_collective)
+{
+ H5P_genplist_t *plist; /* Property list pointer */
+ H5P_coll_md_read_flag_t coll_meta_read; /* Property value */
+ herr_t ret_value = SUCCEED; /* return value */
+
+ FUNC_ENTER_API(FAIL)
+ H5TRACE2("e", "ib", plist_id, is_collective);
+
+ /* Compare the property list's class against the other class */
+ /* (Dataset, group, attribute, and named datype access property lists
+ * are sub-classes of link access property lists -QAK)
+ */
+ if(TRUE != H5P_isa_class(plist_id, H5P_LINK_ACCESS) &&
+ TRUE != H5P_isa_class(plist_id, H5P_FILE_ACCESS) &&
+ TRUE != H5P_isa_class(plist_id, H5P_DATASET_XFER))
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTREGISTER, FAIL, "property list is not an access plist")
+
+ /* set property to either TRUE if > 0, or FALSE otherwise */
+ if(is_collective)
+ coll_meta_read = H5P_USER_TRUE;
+ else
+ coll_meta_read = H5P_USER_FALSE;
+
+ /* Get the plist structure */
+ if(NULL == (plist = (H5P_genplist_t *)H5I_object(plist_id)))
+ HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID")
+
+ /* Set values */
+ if(H5P_set(plist, H5_COLL_MD_READ_FLAG_NAME, &coll_meta_read) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set collective metadata read flag")
+
+done:
+ FUNC_LEAVE_API(ret_value)
+} /* end H5Pset_coll_metadata_read() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5Pget_coll_metadata_read
+ *
+ * Purpose: Gets information about collective metadata read mode.
+ *
+ * Note: This routine accepts file access property lists, link
+ * access property lists, attribute access property lists,
+ * dataset access property lists, group access property lists,
+ * named datatype access property lists,
+ * and dataset transfer property lists.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: Mohamad Chaarawi
+ * Sunday, June 21, 2015
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5Pget_coll_metadata_read(hid_t plist_id, hbool_t *is_collective)
+{
+ herr_t ret_value = SUCCEED; /* return value */
+
+ FUNC_ENTER_API(FAIL)
+ H5TRACE2("e", "i*b", plist_id, is_collective);
+
+ /* Compare the property list's class against the other class */
+ /* (Dataset, group, attribute, and named datype access property lists
+ * are sub-classes of link access property lists -QAK)
+ */
+ if(TRUE != H5P_isa_class(plist_id, H5P_LINK_ACCESS) &&
+ TRUE != H5P_isa_class(plist_id, H5P_FILE_ACCESS) &&
+ TRUE != H5P_isa_class(plist_id, H5P_DATASET_XFER))
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTREGISTER, FAIL, "property list is not an access plist")
+
+ /* Get value */
+ if(is_collective) {
+ H5P_coll_md_read_flag_t internal_flag; /* property setting. we need to convert to either TRUE or FALSE */
+ H5P_genplist_t *plist; /* Property list pointer */
+
+ /* Get the plist structure */
+ if(NULL == (plist = (H5P_genplist_t *)H5I_object(plist_id)))
+ HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID")
+
+ if(H5P_get(plist, H5_COLL_MD_READ_FLAG_NAME, &internal_flag) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get core collective metadata read flag")
+
+ if(internal_flag < 0)
+ *is_collective = FALSE;
+ else
+ *is_collective = (hbool_t)internal_flag;
+ } /* end if */
+
+done:
+ FUNC_LEAVE_API(ret_value)
+} /* H5Pget_coll_metadata_read */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5Pset_coll_metadata_write
+ *
+ * Purpose: Tell the library whether the metadata write operations will
+ * be done collectively (1) or not (0). Default is collective.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: Mohamad Chaarawi
+ * Sunday, June 21, 2015
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5Pset_coll_metadata_write(hid_t plist_id, hbool_t is_collective)
+{
+ H5P_genplist_t *plist; /* Property list pointer */
+ herr_t ret_value = SUCCEED; /* return value */
+
+ FUNC_ENTER_API(FAIL)
+ H5TRACE2("e", "ib", plist_id, is_collective);
+
+ /* Compare the property list's class against the other class */
+ if(TRUE != H5P_isa_class(plist_id, H5P_FILE_ACCESS))
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTREGISTER, FAIL, "property list is not a file access plist")
+
+ /* Get the plist structure */
+ if(NULL == (plist = (H5P_genplist_t *)H5I_object(plist_id)))
+ HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID")
+
+ /* Set values */
+ if(H5P_set(plist, H5F_ACS_COLL_MD_WRITE_FLAG_NAME, &is_collective) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set collective metadata write flag")
+
+done:
+ FUNC_LEAVE_API(ret_value)
+} /* end H5Pset_coll_metadata_write() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5Pget_coll_metadata_write
+ *
+ * Purpose: Gets information about collective metadata write mode.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: Mohamad Chaarawi
+ * Sunday, June 21, 2015
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5Pget_coll_metadata_write(hid_t plist_id, hbool_t *is_collective)
+{
+ H5P_genplist_t *plist; /* Property list pointer */
+ herr_t ret_value = SUCCEED; /* return value */
+
+ FUNC_ENTER_API(FAIL)
+ H5TRACE2("e", "i*b", plist_id, is_collective);
+
+ /* Compare the property list's class against the other class */
+ if(TRUE != H5P_isa_class(plist_id, H5P_FILE_ACCESS))
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTREGISTER, FAIL, "property list is not an access plist")
+
+ /* Get the plist structure */
+ if(NULL == (plist = (H5P_genplist_t *)H5I_object(plist_id)))
+ HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID")
+
+ if(H5P_get(plist, H5F_ACS_COLL_MD_WRITE_FLAG_NAME, is_collective) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get collective metadata write flag")
+
+done:
+ FUNC_LEAVE_API(ret_value)
+} /* end H5Pget_coll_metadata_write() */
+#endif /* H5_HAVE_PARALLEL */
+
diff --git a/src/H5Pint.c b/src/H5Pint.c
index e16379a..4064b66 100644
--- a/src/H5Pint.c
+++ b/src/H5Pint.c
@@ -29,6 +29,7 @@
/* Headers */
/***********/
#include "H5private.h" /* Generic Functions */
+#include "H5ACprivate.h" /* Metadata cache */
#include "H5Eprivate.h" /* Error handling */
#include "H5Fprivate.h" /* File access */
#include "H5FLprivate.h" /* Free lists */
@@ -5450,10 +5451,9 @@ H5P_get_class(const H5P_genplist_t *plist)
*-------------------------------------------------------------------------
*/
herr_t
-H5P_verify_apl_and_dxpl(hid_t *acspl_id, const H5P_libclass_t *libclass,
- hid_t *dxpl_id, hid_t loc_id, hbool_t is_collective)
+H5P_verify_apl_and_dxpl(hid_t *acspl_id, const H5P_libclass_t *libclass, hid_t *dxpl_id,
+ hid_t loc_id, hbool_t is_collective)
{
- MPI_Comm mpi_comm;
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI_NOINIT
@@ -5467,9 +5467,30 @@ H5P_verify_apl_and_dxpl(hid_t *acspl_id, const H5P_libclass_t *libclass,
if(H5P_DEFAULT == *acspl_id)
*acspl_id = *libclass->def_plist_id;
else {
+#ifdef H5_HAVE_PARALLEL
+ H5P_coll_md_read_flag_t is_collective; /* Collective metadata read flag */
+ H5P_genplist_t *plist; /* Property list pointer */
+#endif /* H5_HAVE_PARALLEL */
+
/* Sanity check the access property list class */
if(TRUE != H5P_isa_class(*acspl_id, *libclass->class_id))
HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not the required access property list")
+
+ *dxpl_id = H5AC_ind_read_dxpl_id;
+
+#ifdef H5_HAVE_PARALLEL
+ /* Get the plist structure for the access property list */
+ if(NULL == (plist = (H5P_genplist_t *)H5I_object(*acspl_id)))
+ HGOTO_ERROR(H5E_PLIST, H5E_BADATOM, FAIL, "can't find object for ID")
+
+ /* Get the collective metadata read flag */
+ if(H5P_peek(plist, H5_COLL_MD_READ_FLAG_NAME, &is_collective) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get core collective metadata read flag")
+
+ /* If collective metadata read requested and using internal DXPL, switch to internal collective DXPL */
+ if(H5P_USER_TRUE == is_collective)
+ *dxpl_id = H5AC_coll_read_dxpl_id;
+#endif /* H5_HAVE_PARALLEL */
} /* end else */
#ifdef H5_HAVE_PARALLEL
@@ -5479,6 +5500,8 @@ H5P_verify_apl_and_dxpl(hid_t *acspl_id, const H5P_libclass_t *libclass,
happens only when the environment variable H5_COLL_BARRIER is set
to non 0. */
if(is_collective && H5_coll_api_sanity_check_g) {
+ MPI_Comm mpi_comm;
+
/* retrieve the MPI communicator from the loc_id or the fapl_id */
if(H5F_mpi_retrieve_comm(loc_id, *acspl_id, &mpi_comm) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get MPI communicator")
@@ -5492,3 +5515,4 @@ H5P_verify_apl_and_dxpl(hid_t *acspl_id, const H5P_libclass_t *libclass,
done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5P_verify_apl_and_dxpl() */
+
diff --git a/src/H5Plapl.c b/src/H5Plapl.c
index b7c682c..0d5507a 100644
--- a/src/H5Plapl.c
+++ b/src/H5Plapl.c
@@ -88,6 +88,11 @@
#define H5L_ACS_ELINK_CB_SIZE sizeof(H5L_elink_cb_t)
#define H5L_ACS_ELINK_CB_DEF {NULL,NULL}
+/* Definition for reading metadata collectively */
+#define H5L_ACS_COLL_MD_READ_SIZE sizeof(H5P_coll_md_read_flag_t)
+#define H5L_ACS_COLL_MD_READ_DEF H5P_USER_FALSE
+#define H5L_ACS_COLL_MD_READ_ENC H5P__encode_coll_md_read_flag_t
+#define H5L_ACS_COLL_MD_READ_DEC H5P__decode_coll_md_read_flag_t
/******************/
/* Local Typedefs */
@@ -164,7 +169,7 @@ static const char *H5L_def_elink_prefix_g = H5L_ACS_ELINK_PREFIX_DEF; /* Default
static const hid_t H5L_def_fapl_id_g = H5L_ACS_ELINK_FAPL_DEF; /* Default fapl for external link access */
static const unsigned H5L_def_elink_flags_g = H5L_ACS_ELINK_FLAGS_DEF; /* Default file access flags for external link traversal */
static const H5L_elink_cb_t H5L_def_elink_cb_g = H5L_ACS_ELINK_CB_DEF; /* Default external link traversal callback */
-
+static const H5P_coll_md_read_flag_t H5L_def_coll_md_read_g = H5L_ACS_COLL_MD_READ_DEF; /* Default setting for the collective metedata read flag */
/*-------------------------------------------------------------------------
@@ -216,6 +221,13 @@ H5P__lacc_reg_prop(H5P_genclass_t *pclass)
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class")
+ /* Register the metadata collective read flag */
+ if(H5P_register_real(pclass, H5_COLL_MD_READ_FLAG_NAME, H5L_ACS_COLL_MD_READ_SIZE,
+ &H5L_def_coll_md_read_g,
+ NULL, NULL, NULL, H5L_ACS_COLL_MD_READ_ENC, H5L_ACS_COLL_MD_READ_DEC,
+ NULL, NULL, NULL, NULL) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class")
+
done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5P__lacc_reg_prop() */
diff --git a/src/H5Ppkg.h b/src/H5Ppkg.h
index 5997845..3662cf9 100644
--- a/src/H5Ppkg.h
+++ b/src/H5Ppkg.h
@@ -192,6 +192,8 @@ H5_DLL herr_t H5P__decode_unsigned(const void **_pp, void *value);
H5_DLL herr_t H5P__decode_uint8_t(const void **_pp, void *value);
H5_DLL herr_t H5P__decode_hbool_t(const void **_pp, void *value);
H5_DLL herr_t H5P__decode_double(const void **_pp, void *value);
+H5_DLL herr_t H5P__encode_coll_md_read_flag_t(const void *value, void **_pp, size_t *size);
+H5_DLL herr_t H5P__decode_coll_md_read_flag_t(const void **_pp, void *value);
/* Private OCPL routines */
H5_DLL herr_t H5P_get_filter(const struct H5Z_filter_info_t *filter,
diff --git a/src/H5Pprivate.h b/src/H5Pprivate.h
index d93e199..9074b78 100644
--- a/src/H5Pprivate.h
+++ b/src/H5Pprivate.h
@@ -46,6 +46,14 @@
/* Library Private Typedefs */
/****************************/
+#define H5_COLL_MD_READ_FLAG_NAME "collective_metadata_read"
+
+typedef enum H5P_coll_md_read_flag_t {
+ H5P_FORCE_FALSE = -1,
+ H5P_USER_FALSE = 0,
+ H5P_USER_TRUE = 1
+} H5P_coll_md_read_flag_t;
+
/* Forward declarations (for prototypes & type definitions) */
struct H5O_fill_t;
struct H5T_t;
diff --git a/src/H5Ppublic.h b/src/H5Ppublic.h
index 6a97918..a0a684f 100644
--- a/src/H5Ppublic.h
+++ b/src/H5Ppublic.h
@@ -351,6 +351,12 @@ H5_DLL herr_t H5Pget_file_image_callbacks(hid_t fapl_id,
H5FD_file_image_callbacks_t *callbacks_ptr);
H5_DLL herr_t H5Pset_core_write_tracking(hid_t fapl_id, hbool_t is_enabled, size_t page_size);
H5_DLL herr_t H5Pget_core_write_tracking(hid_t fapl_id, hbool_t *is_enabled, size_t *page_size);
+#ifdef H5_HAVE_PARALLEL
+H5_DLL herr_t H5Pset_coll_metadata_read(hid_t plist_id, hbool_t is_collective);
+H5_DLL herr_t H5Pget_coll_metadata_read(hid_t plist_id, hbool_t *is_collective);
+H5_DLL herr_t H5Pset_coll_metadata_write(hid_t plist_id, hbool_t is_collective);
+H5_DLL herr_t H5Pget_coll_metadata_write(hid_t plist_id, hbool_t *is_collective);
+#endif /* H5_HAVE_PARALLEL */
/* Dataset creation property list (DCPL) routines */
H5_DLL herr_t H5Pset_layout(hid_t plist_id, H5D_layout_t layout);