summaryrefslogtreecommitdiffstats
path: root/src/H5Cmpio.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/H5Cmpio.c')
-rw-r--r--src/H5Cmpio.c260
1 files changed, 155 insertions, 105 deletions
diff --git a/src/H5Cmpio.c b/src/H5Cmpio.c
index 199c494..a4c295b 100644
--- a/src/H5Cmpio.c
+++ b/src/H5Cmpio.c
@@ -18,7 +18,7 @@
* Quincey Koziol
*
* Purpose: Functions in this file implement support for parallel I/O for
- * generic cache code.
+ * generic cache code.
*
*-------------------------------------------------------------------------
*/
@@ -28,7 +28,7 @@
/****************/
#include "H5Cmodule.h" /* This source code file is part of the H5C module */
-#define H5F_FRIEND /*suppress error about including H5Fpkg */
+#define H5F_FRIEND /*suppress error about including H5Fpkg */
/***********/
@@ -114,9 +114,9 @@ static herr_t H5C__flush_candidates_in_ring(H5F_t *f, H5C_ring_t ring,
*
* We construct the table as follows. Let:
*
- * n = num_candidates / mpi_size;
+ * n = num_candidates / mpi_size;
*
- * m = num_candidates % mpi_size;
+ * m = num_candidates % mpi_size;
*
* Now allocate an array of integers of length mpi_size + 1,
* and call this array candidate_assignment_table.
@@ -136,10 +136,10 @@ static herr_t H5C__flush_candidates_in_ring(H5F_t *f, H5C_ring_t ring,
* Once the table is constructed, we determine the first and
* last entry this process is to flush as follows:
*
- * first_entry_to_flush = candidate_assignment_table[mpi_rank]
+ * first_entry_to_flush = candidate_assignment_table[mpi_rank]
*
* last_entry_to_flush =
- * candidate_assignment_table[mpi_rank + 1] - 1;
+ * candidate_assignment_table[mpi_rank + 1] - 1;
*
* With these values determined, we simply scan through the
* candidate list, marking all entries in the range
@@ -163,6 +163,10 @@ static herr_t H5C__flush_candidates_in_ring(H5F_t *f, H5C_ring_t ring,
* Programmer: John Mainzer
* 3/17/10
*
+ * Changes: Updated sanity checks to allow for the possibility that
+ * the slist is disabled.
+ * JRM -- 8/3/20
+ *
*-------------------------------------------------------------------------
*/
herr_t
@@ -185,12 +189,15 @@ H5C_apply_candidate_list(H5F_t * f,
unsigned entries_to_clear[H5C_RING_NTYPES];
haddr_t addr;
H5C_cache_entry_t * entry_ptr = NULL;
+
#if H5C_DO_SANITY_CHECKS
haddr_t last_addr;
#endif /* H5C_DO_SANITY_CHECKS */
+
#if H5C_APPLY_CANDIDATE_LIST__DEBUG
char tbl_buf[1024];
#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */
+
unsigned u; /* Local index variable */
herr_t ret_value = SUCCEED; /* Return value */
@@ -200,7 +207,8 @@ H5C_apply_candidate_list(H5F_t * f,
HDassert(cache_ptr != NULL);
HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC);
HDassert(num_candidates > 0);
- HDassert(num_candidates <= cache_ptr->slist_len);
+ HDassert( ( ! cache_ptr->slist_enabled ) ||
+ ( num_candidates <= cache_ptr->slist_len ));
HDassert(candidates_list_ptr != NULL);
HDassert(0 <= mpi_rank);
HDassert(mpi_rank < mpi_size);
@@ -410,6 +418,7 @@ done:
/*-------------------------------------------------------------------------
+ *
* Function: H5C_construct_candidate_list__clean_cache
*
* Purpose: Construct the list of entries that should be flushed to
@@ -425,6 +434,16 @@ done:
* Programmer: John Mainzer
* 3/17/10
*
+ * Changes: With the slist optimization, the slist is not maintained
+ * unless a flush is in progress. Thus we can not longer use
+ * cache_ptr->slist_size to determine the total size of
+ * the entries we must insert in the candidate list.
+ *
+ * To address this, we now use cache_ptr->dirty_index_size
+ * instead.
+ *
+ * JRM -- 7/27/20
+ *
*-------------------------------------------------------------------------
*/
herr_t
@@ -438,60 +457,82 @@ H5C_construct_candidate_list__clean_cache(H5C_t * cache_ptr)
HDassert( cache_ptr != NULL );
HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC );
- /* As a sanity check, set space needed to the size of the skip list.
- * This should be the sum total of the sizes of all the dirty entries
- * in the metadata cache.
+ /* As a sanity check, set space needed to the dirty_index_size. This
+ * should be the sum total of the sizes of all the dirty entries
+ * in the metadata cache. Note that if the slist is enabled,
+ * cache_ptr->slist_size should equal cache_ptr->dirty_index_size.
*/
- space_needed = cache_ptr->slist_size;
+ space_needed = cache_ptr->dirty_index_size;
+
+ HDassert( ( ! cache_ptr->slist_enabled ) ||
+ ( space_needed == cache_ptr->slist_size ) );
+
/* Recall that while we shouldn't have any protected entries at this
* point, it is possible that some dirty entries may reside on the
* pinned list at this point.
*/
- HDassert( cache_ptr->slist_size <=
+ HDassert( cache_ptr->dirty_index_size <=
(cache_ptr->dLRU_list_size + cache_ptr->pel_size) );
- HDassert( cache_ptr->slist_len <=
- (cache_ptr->dLRU_list_len + cache_ptr->pel_len) );
+ HDassert( ( ! cache_ptr->slist_enabled ) ||
+ ( cache_ptr->slist_len <=
+ (cache_ptr->dLRU_list_len + cache_ptr->pel_len) ) );
+
if(space_needed > 0) { /* we have work to do */
+
H5C_cache_entry_t *entry_ptr;
unsigned nominated_entries_count = 0;
size_t nominated_entries_size = 0;
- haddr_t nominated_addr;
+ haddr_t nominated_addr;
- HDassert( cache_ptr->slist_len > 0 );
+ HDassert( ( ! cache_ptr->slist_enabled ) ||
+ ( cache_ptr->slist_len > 0 ) );
/* Scan the dirty LRU list from tail forward and nominate sufficient
* entries to free up the necessary space.
*/
entry_ptr = cache_ptr->dLRU_tail_ptr;
- while((nominated_entries_size < space_needed) &&
- (nominated_entries_count < cache_ptr->slist_len) &&
- (entry_ptr != NULL)) {
+
+ while ( ( nominated_entries_size < space_needed ) &&
+ ( ( ! cache_ptr->slist_enabled ) ||
+ ( nominated_entries_count < cache_ptr->slist_len ) ) &&
+ ( entry_ptr != NULL ) ) {
+
HDassert( ! (entry_ptr->is_protected) );
HDassert( ! (entry_ptr->is_read_only) );
HDassert( entry_ptr->ro_ref_count == 0 );
HDassert( entry_ptr->is_dirty );
- HDassert( entry_ptr->in_slist );
+ HDassert( ( ! cache_ptr->slist_enabled ) ||
+ ( entry_ptr->in_slist ) );
nominated_addr = entry_ptr->addr;
+
if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed")
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "H5AC_add_candidate() failed")
nominated_entries_size += entry_ptr->size;
nominated_entries_count++;
entry_ptr = entry_ptr->aux_prev;
+
} /* end while */
+
HDassert( entry_ptr == NULL );
/* it is possible that there are some dirty entries on the
* protected entry list as well -- scan it too if necessary
*/
entry_ptr = cache_ptr->pel_head_ptr;
- while((nominated_entries_size < space_needed) &&
- (nominated_entries_count < cache_ptr->slist_len) &&
- (entry_ptr != NULL)) {
+
+ while ( ( nominated_entries_size < space_needed ) &&
+ ( ( ! cache_ptr->slist_enabled ) ||
+ ( nominated_entries_count < cache_ptr->slist_len ) ) &&
+ ( entry_ptr != NULL ) ) {
+
if(entry_ptr->is_dirty) {
+
HDassert( ! (entry_ptr->is_protected) );
HDassert( ! (entry_ptr->is_read_only) );
HDassert( entry_ptr->ro_ref_count == 0 );
@@ -499,22 +540,31 @@ H5C_construct_candidate_list__clean_cache(H5C_t * cache_ptr)
HDassert( entry_ptr->in_slist );
nominated_addr = entry_ptr->addr;
+
if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed")
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "H5AC_add_candidate() failed")
nominated_entries_size += entry_ptr->size;
nominated_entries_count++;
+
} /* end if */
entry_ptr = entry_ptr->next;
+
} /* end while */
- HDassert( nominated_entries_count == cache_ptr->slist_len );
+ HDassert( ( ! cache_ptr->slist_enabled ) ||
+ ( nominated_entries_count == cache_ptr->slist_len ) );
HDassert( nominated_entries_size == space_needed );
+
} /* end if */
done:
+
FUNC_LEAVE_NOAPI(ret_value)
+
} /* H5C_construct_candidate_list__clean_cache() */
@@ -534,6 +584,12 @@ done:
* Programmer: John Mainzer
* 3/17/10
*
+ * Changes: With the slist optimization, the slist is not maintained
+ * unless a flush is in progress. Updated sanity checks to
+ * reflect this.
+ *
+ * JRM -- 7/27/20
+ *
*-------------------------------------------------------------------------
*/
herr_t
@@ -551,54 +607,77 @@ H5C_construct_candidate_list__min_clean(H5C_t * cache_ptr)
* cache back within its min clean constraints.
*/
if(cache_ptr->max_cache_size > cache_ptr->index_size) {
- if(((cache_ptr->max_cache_size - cache_ptr->index_size) +
- cache_ptr->cLRU_list_size) >= cache_ptr->min_clean_size)
+
+ if ( ( (cache_ptr->max_cache_size - cache_ptr->index_size) +
+ cache_ptr->cLRU_list_size) >= cache_ptr->min_clean_size ) {
+
space_needed = 0;
- else
+
+ } else {
+
space_needed = cache_ptr->min_clean_size -
((cache_ptr->max_cache_size - cache_ptr->index_size) +
cache_ptr->cLRU_list_size);
+ }
} /* end if */
else {
- if(cache_ptr->min_clean_size <= cache_ptr->cLRU_list_size)
+
+ if(cache_ptr->min_clean_size <= cache_ptr->cLRU_list_size) {
+
space_needed = 0;
- else
+
+ } else {
+
space_needed = cache_ptr->min_clean_size -
cache_ptr->cLRU_list_size;
+ }
} /* end else */
if(space_needed > 0) { /* we have work to do */
+
H5C_cache_entry_t *entry_ptr;
unsigned nominated_entries_count = 0;
size_t nominated_entries_size = 0;
- HDassert( cache_ptr->slist_len > 0 );
+ HDassert( ( ! cache_ptr->slist_enabled ) ||
+ ( cache_ptr->slist_len > 0 ) );
/* Scan the dirty LRU list from tail forward and nominate sufficient
* entries to free up the necessary space.
*/
entry_ptr = cache_ptr->dLRU_tail_ptr;
- while((nominated_entries_size < space_needed) &&
- (nominated_entries_count < cache_ptr->slist_len) &&
- (entry_ptr != NULL) &&
- (!entry_ptr->flush_me_last)) {
- haddr_t nominated_addr;
+
+ while ( ( nominated_entries_size < space_needed ) &&
+ ( ( ! cache_ptr->slist_enabled ) ||
+ ( nominated_entries_count < cache_ptr->slist_len ) ) &&
+ ( entry_ptr != NULL ) &&
+ ( ! entry_ptr->flush_me_last ) ) {
+
+ haddr_t nominated_addr;
HDassert( ! (entry_ptr->is_protected) );
HDassert( ! (entry_ptr->is_read_only) );
HDassert( entry_ptr->ro_ref_count == 0 );
HDassert( entry_ptr->is_dirty );
- HDassert( entry_ptr->in_slist );
+ HDassert( ( ! cache_ptr->slist_enabled ) ||
+ ( entry_ptr->in_slist ) );
nominated_addr = entry_ptr->addr;
+
if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed")
+
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \
+ "H5AC_add_candidate() failed")
nominated_entries_size += entry_ptr->size;
nominated_entries_count++;
entry_ptr = entry_ptr->aux_prev;
+
} /* end while */
- HDassert( nominated_entries_count <= cache_ptr->slist_len );
+
+ HDassert( ( ! cache_ptr->slist_enabled ) ||
+ ( nominated_entries_count <= cache_ptr->slist_len ) );
+ HDassert( nominated_entries_size <= cache_ptr->dirty_index_size );
HDassert( nominated_entries_size >= space_needed );
} /* end if */
@@ -769,7 +848,7 @@ H5C_mark_entries_as_clean(H5F_t * f,
* of the pre_serialize / serialize routines, this may
* cease to be the case -- requiring a review of this
* point.
- * JRM -- 4/7/15
+ * JRM -- 4/7/15
*/
entries_cleared = 0;
entries_examined = 0;
@@ -873,9 +952,9 @@ done:
herr_t
H5C_clear_coll_entries(H5C_t *cache_ptr, hbool_t partial)
{
- uint32_t clear_cnt;
- H5C_cache_entry_t * entry_ptr = NULL;
- herr_t ret_value = SUCCEED;
+ uint32_t clear_cnt;
+ H5C_cache_entry_t * entry_ptr = NULL;
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI_NOINIT
@@ -921,6 +1000,7 @@ H5C__collective_write(H5F_t *f)
{
H5AC_t *cache_ptr;
H5FD_mpio_xfer_t orig_xfer_mode = H5FD_MPIO_COLLECTIVE;
+ void *base_buf;
int count;
int *length_array = NULL;
MPI_Aint *buf_array = NULL;
@@ -930,6 +1010,8 @@ H5C__collective_write(H5F_t *f)
MPI_Datatype ftype;
hbool_t ftype_created = FALSE;
int mpi_code;
+ char unused = 0; /* Unused, except for non-NULL pointer value */
+ size_t buf_count;
herr_t ret_value = SUCCEED;
FUNC_ENTER_STATIC
@@ -944,20 +1026,17 @@ H5C__collective_write(H5F_t *f)
if(H5CX_get_io_xfer_mode(&orig_xfer_mode) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode")
+ /* Set transfer mode */
+ if(H5CX_set_io_xfer_mode(H5FD_MPIO_COLLECTIVE) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set MPI-I/O transfer mode")
+
/* Get number of entries in collective write list */
count = (int)H5SL_count(cache_ptr->coll_write_list);
-
if(count > 0) {
- H5FD_mpio_xfer_t xfer_mode = H5FD_MPIO_COLLECTIVE;
H5SL_node_t *node;
H5C_cache_entry_t *entry_ptr;
- void *base_buf;
int i;
- /* Set new transfer mode */
- if(H5CX_set_io_xfer_mode(xfer_mode) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set MPI-I/O transfer mode")
-
/* Allocate arrays */
if(NULL == (length_array = (int *)H5MM_malloc((size_t)count * sizeof(int))) )
HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for collective write table length array")
@@ -981,7 +1060,6 @@ H5C__collective_write(H5F_t *f)
node = H5SL_next(node);
i = 1;
while(node) {
-
if(NULL == (entry_ptr = (H5C_cache_entry_t *)H5SL_item(node)))
HGOTO_ERROR(H5E_CACHE, H5E_NOTFOUND, FAIL, "can't retrieve skip list item")
@@ -998,67 +1076,39 @@ H5C__collective_write(H5F_t *f)
/* Create memory MPI type */
if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed(count, length_array, buf_array, MPI_BYTE, &btype)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
-
btype_created = TRUE;
-
if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(&btype)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
/* Create file MPI type */
if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed(count, length_array, offset_array, MPI_BYTE, &ftype)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
-
ftype_created = TRUE;
-
if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(&ftype)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
- /* Pass buf type, file type to the file driver */
- if(H5CX_set_mpi_coll_datatypes(btype, ftype) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set MPI-I/O properties")
-
- /* Write data */
- if(H5F_block_write(f, H5FD_MEM_DEFAULT, (haddr_t)0, (size_t)1, base_buf) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to write entries collectively")
-
+ /* MPI count to write */
+ buf_count = 1;
} /* end if */
else {
- MPI_Status mpi_stat;
- MPI_File *mpi_fh_p;
- MPI_File mpi_fh;
- MPI_Info *info_p;
- MPI_Info info;
-
-/* This should be rewritten to call H5F_block_write, with the correct
- * buffer and file datatypes (null ones). -QAK, 2018/02/21
- */
- if(H5F_get_mpi_handle(f, (MPI_File **)&mpi_fh_p) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't get mpi file handle")
+ /* Pass trivial buf type, file type to the file driver */
+ btype = MPI_BYTE;
+ ftype = MPI_BYTE;
- mpi_fh = *(MPI_File*)mpi_fh_p;
+ /* Set non-NULL pointer for I/O operation */
+ base_buf = &unused;
- if(H5F_get_mpi_info(f, &info_p) < 0)
- HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get mpi file info")
-
- info = *info_p;
-
- /* just to match up with the 1st MPI_File_set_view from
- * H5FD_mpio_write()
- */
- if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(mpi_fh, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, "native", info)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
+ /* MPI count to write */
+ buf_count = 0;
+ } /* end else */
- /* just to match up with MPI_File_write_at_all from H5FD_mpio_write() */
- HDmemset(&mpi_stat, 0, sizeof(MPI_Status));
- if(MPI_SUCCESS != (mpi_code = MPI_File_write_at_all(mpi_fh, (MPI_Offset)0, NULL, 0, MPI_BYTE, &mpi_stat)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code)
+ /* Pass buf type, file type to the file driver */
+ if(H5CX_set_mpi_coll_datatypes(btype, ftype) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set MPI-I/O properties")
- /* just to match up with the 2nd MPI_File_set_view (reset) in
- * H5FD_mpio_write()
- */
- if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(mpi_fh, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, "native", info)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
- } /* end else */
+ /* Write data */
+ if(H5F_block_write(f, H5FD_MEM_DEFAULT, (haddr_t)0, buf_count, base_buf) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_WRITEERROR, FAIL, "unable to write entries collectively")
done:
/* Free arrays */
@@ -1124,17 +1174,17 @@ H5C__flush_candidate_entries(H5F_t *f, unsigned entries_to_flush[H5C_RING_NTYPES
unsigned entries_to_clear[H5C_RING_NTYPES])
{
#if H5C_DO_SANITY_CHECKS
- int i;
- uint32_t index_len = 0;
- size_t index_size = (size_t)0;
- size_t clean_index_size = (size_t)0;
- size_t dirty_index_size = (size_t)0;
- size_t slist_size = (size_t)0;
- uint32_t slist_len = 0;
+ int i;
+ uint32_t index_len = 0;
+ size_t index_size = (size_t)0;
+ size_t clean_index_size = (size_t)0;
+ size_t dirty_index_size = (size_t)0;
+ size_t slist_size = (size_t)0;
+ uint32_t slist_len = 0;
#endif /* H5C_DO_SANITY_CHECKS */
- H5C_ring_t ring;
+ H5C_ring_t ring;
H5C_t * cache_ptr;
- herr_t ret_value = SUCCEED;
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_STATIC