diff options
Diffstat (limited to 'src/H5ACmpio.c')
-rw-r--r-- | src/H5ACmpio.c | 107 |
1 files changed, 66 insertions, 41 deletions
diff --git a/src/H5ACmpio.c b/src/H5ACmpio.c index ef85c6d..dcc1ab2 100644 --- a/src/H5ACmpio.c +++ b/src/H5ACmpio.c @@ -305,8 +305,10 @@ H5AC__broadcast_candidate_list(H5AC_t *cache_ptr, unsigned *num_entries_ptr, had * are used to receiving from process 0, and also load it * into a buffer for transmission. */ - if (H5AC__copy_candidate_list_to_buffer(cache_ptr, &chk_num_entries, &haddr_buf_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.") + if (H5AC__copy_candidate_list_to_buffer(cache_ptr, &chk_num_entries, &haddr_buf_ptr) < 0) { + /* Push an error, but still participate in following MPI_Bcast */ + HDONE_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.") + } HDassert(chk_num_entries == num_entries); HDassert(haddr_buf_ptr != NULL); @@ -429,18 +431,23 @@ H5AC__broadcast_clean_list(H5AC_t *cache_ptr) /* allocate a buffer to store the list of entry base addresses in */ buf_size = sizeof(haddr_t) * num_entries; - if (NULL == (addr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for addr buffer") - - /* Set up user data for callback */ - udata.aux_ptr = aux_ptr; - udata.addr_buf_ptr = addr_buf_ptr; - udata.u = 0; - - /* Free all the clean list entries, building the address list in the callback */ - /* (Callback also removes the matching entries from the dirtied list) */ - if (H5SL_free(aux_ptr->c_slist_ptr, H5AC__broadcast_clean_list_cb, &udata) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "Can't build address list for clean entries") + if (NULL == (addr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size))) { + /* Push an error, but still participate in following MPI_Bcast */ + HDONE_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for addr buffer") + } + else { + /* Set up user data for callback */ + udata.aux_ptr = aux_ptr; + udata.addr_buf_ptr = addr_buf_ptr; + udata.u = 0; + + /* Free all the clean list entries, building the address list in the callback */ + /* (Callback also removes the matching entries from the dirtied list) */ + if (H5SL_free(aux_ptr->c_slist_ptr, H5AC__broadcast_clean_list_cb, &udata) < 0) { + /* Push an error, but still participate in following MPI_Bcast */ + HDONE_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "Can't build address list for clean entries") + } + } /* Now broadcast the list of cleaned entries */ if (MPI_SUCCESS != @@ -1449,8 +1456,10 @@ H5AC__receive_haddr_list(MPI_Comm mpi_comm, unsigned *num_entries_ptr, haddr_t * /* allocate buffers to store the list of entry base addresses in */ buf_size = sizeof(haddr_t) * num_entries; - if (NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for haddr buffer") + if (NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size))) { + /* Push an error, but still participate in following MPI_Bcast */ + HDONE_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for haddr buffer") + } /* Now receive the list of candidate entries */ if (MPI_SUCCESS != @@ -1801,10 +1810,14 @@ H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f) if (evictions_enabled) { /* construct candidate list -- process 0 only */ - if (aux_ptr->mpi_rank == 0) + if (aux_ptr->mpi_rank == 0) { + /* If constructing candidate list fails, push an error but still participate + * in collective operations during following candidate list propagation + */ if (H5AC__construct_candidate_list(cache_ptr, aux_ptr, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.") + HDONE_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.") + } /* propagate and apply candidate list -- all processes */ if (H5AC__propagate_and_apply_candidate_list(f) < 0) @@ -1900,15 +1913,21 @@ H5AC__rsp__p0_only__flush(H5F_t *f) aux_ptr->write_permitted = FALSE; /* Check for error on the write operation */ - if (result < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.") - - /* this code exists primarily for the test bed -- it allows us to - * enforce POSIX semantics on the server that pretends to be a - * file system in our parallel tests. - */ - if (aux_ptr->write_done) - (aux_ptr->write_done)(); + if (result < 0) { + /* If write operation fails, push an error but still participate + * in collective operations during following cache entry + * propagation + */ + HDONE_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.") + } + else { + /* this code exists primarily for the test bed -- it allows us to + * enforce POSIX semantics on the server that pretends to be a + * file system in our parallel tests. + */ + if (aux_ptr->write_done) + (aux_ptr->write_done)(); + } } /* end if */ /* Propagate cleaned entries to other ranks. */ @@ -2020,15 +2039,21 @@ H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f) aux_ptr->write_permitted = FALSE; /* Check for error on the write operation */ - if (result < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_flush_to_min_clean() failed.") - - /* this call exists primarily for the test code -- it is used - * to enforce POSIX semantics on the process used to simulate - * reads and writes in t_cache.c. - */ - if (aux_ptr->write_done) - (aux_ptr->write_done)(); + if (result < 0) { + /* If write operation fails, push an error but still participate + * in collective operations during following cache entry + * propagation + */ + HDONE_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_flush_to_min_clean() failed.") + } + else { + /* this call exists primarily for the test code -- it is used + * to enforce POSIX semantics on the process used to simulate + * reads and writes in t_cache.c. + */ + if (aux_ptr->write_done) + (aux_ptr->write_done)(); + } } /* end if */ if (H5AC__propagate_flushed_and_still_clean_entries_list(f) < 0) @@ -2094,11 +2119,11 @@ H5AC__run_sync_point(H5F_t *f, int sync_point_op) (sync_point_op == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED)); #if H5AC_DEBUG_DIRTY_BYTES_CREATION - HDfprintf(stdout, "%d:H5AC_propagate...:%u: (u/uu/i/iu/r/ru) = %zu/%u/%zu/%u/%zu/%u\n", aux_ptr->mpi_rank, + HDfprintf(stdout, "%d:H5AC_propagate...:%u: (u/uu/i/iu/m/mu) = %zu/%u/%zu/%u/%zu/%u\n", aux_ptr->mpi_rank, aux_ptr->dirty_bytes_propagations, aux_ptr->unprotect_dirty_bytes, aux_ptr->unprotect_dirty_bytes_updates, aux_ptr->insert_dirty_bytes, - aux_ptr->insert_dirty_bytes_updates, aux_ptr->rename_dirty_bytes, - aux_ptr->rename_dirty_bytes_updates); + aux_ptr->insert_dirty_bytes_updates, aux_ptr->move_dirty_bytes, + aux_ptr->move_dirty_bytes_updates); #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ /* clear collective access flag on half of the entries in the @@ -2162,8 +2187,8 @@ H5AC__run_sync_point(H5F_t *f, int sync_point_op) aux_ptr->unprotect_dirty_bytes_updates = 0; aux_ptr->insert_dirty_bytes = 0; aux_ptr->insert_dirty_bytes_updates = 0; - aux_ptr->rename_dirty_bytes = 0; - aux_ptr->rename_dirty_bytes_updates = 0; + aux_ptr->move_dirty_bytes = 0; + aux_ptr->move_dirty_bytes_updates = 0; #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ done: |