diff options
author | jhendersonHDF <jhenderson@hdfgroup.org> | 2022-03-25 21:31:06 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-25 21:31:06 (GMT) |
commit | 15971fbd160ab061725f238379cc638ed37f05ef (patch) | |
tree | 1be26bd1be27c67ba674ddc39611d68959c3c207 /src/H5ACmpio.c | |
parent | f73b4c618cb680f8be9b2f2510c34442ec24d713 (diff) | |
download | hdf5-15971fbd160ab061725f238379cc638ed37f05ef.zip hdf5-15971fbd160ab061725f238379cc638ed37f05ef.tar.gz hdf5-15971fbd160ab061725f238379cc638ed37f05ef.tar.bz2 |
Hdf5 1 12 merges (#1528)
* Use internal version of H5Eprint2 to avoid possible stack overflow (#661)
* Add support for parallel filters to h5repack (#832)
* Allow parallel filters feature for comm size of 1 (#840)
* Avoid popping API context when one wasn't pushed (#848)
* Fix several warnings (#720)
* Don't allow H5Pset(get)_all_coll_metadata_ops for DXPLs (#1201)
* Fix free list tracking and cleanup cast alignment warnings (#1288)
* Fix free list tracking and cleanup cast alignment warnings
* Add free list tracking code to H5FL 'arr' routines
* Fix usage of several HDfprintf format specifiers after HDfprintf removal (#1324)
* Use appropriate printf format specifiers for haddr_t and hsize_t types directly (#1340)
* Fix H5ACmpio dirty bytes creation debugging (#1357)
* Fix documentation for H5D_space_status_t enum values (#1372)
* Parallel rank0 deadlock fixes (#1183)
* Fix several places where rank 0 can skip past collective MPI operations on failure
* Committing clang-format changes
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
* Fix a few issues noted by LGTM (#1421)
* Fix cache sanity checking code by moving functions to wider scope (#1435)
* Fix metadata cache bug when resizing a pinned/protected entry (v2) (#1463)
* Disable memory alloc sanity checks by default for Autotools debug builds (#1468)
* Committing clang-format changes
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Diffstat (limited to 'src/H5ACmpio.c')
-rw-r--r-- | src/H5ACmpio.c | 107 |
1 files changed, 66 insertions, 41 deletions
diff --git a/src/H5ACmpio.c b/src/H5ACmpio.c index ef85c6d..dcc1ab2 100644 --- a/src/H5ACmpio.c +++ b/src/H5ACmpio.c @@ -305,8 +305,10 @@ H5AC__broadcast_candidate_list(H5AC_t *cache_ptr, unsigned *num_entries_ptr, had * are used to receiving from process 0, and also load it * into a buffer for transmission. */ - if (H5AC__copy_candidate_list_to_buffer(cache_ptr, &chk_num_entries, &haddr_buf_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.") + if (H5AC__copy_candidate_list_to_buffer(cache_ptr, &chk_num_entries, &haddr_buf_ptr) < 0) { + /* Push an error, but still participate in following MPI_Bcast */ + HDONE_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate buffer.") + } HDassert(chk_num_entries == num_entries); HDassert(haddr_buf_ptr != NULL); @@ -429,18 +431,23 @@ H5AC__broadcast_clean_list(H5AC_t *cache_ptr) /* allocate a buffer to store the list of entry base addresses in */ buf_size = sizeof(haddr_t) * num_entries; - if (NULL == (addr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for addr buffer") - - /* Set up user data for callback */ - udata.aux_ptr = aux_ptr; - udata.addr_buf_ptr = addr_buf_ptr; - udata.u = 0; - - /* Free all the clean list entries, building the address list in the callback */ - /* (Callback also removes the matching entries from the dirtied list) */ - if (H5SL_free(aux_ptr->c_slist_ptr, H5AC__broadcast_clean_list_cb, &udata) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "Can't build address list for clean entries") + if (NULL == (addr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size))) { + /* Push an error, but still participate in following MPI_Bcast */ + HDONE_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for addr buffer") + } + else { + /* Set up user data for callback */ + udata.aux_ptr = aux_ptr; + udata.addr_buf_ptr = addr_buf_ptr; + udata.u = 0; + + /* Free all the clean list entries, building the address list in the callback */ + /* (Callback also removes the matching entries from the dirtied list) */ + if (H5SL_free(aux_ptr->c_slist_ptr, H5AC__broadcast_clean_list_cb, &udata) < 0) { + /* Push an error, but still participate in following MPI_Bcast */ + HDONE_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "Can't build address list for clean entries") + } + } /* Now broadcast the list of cleaned entries */ if (MPI_SUCCESS != @@ -1449,8 +1456,10 @@ H5AC__receive_haddr_list(MPI_Comm mpi_comm, unsigned *num_entries_ptr, haddr_t * /* allocate buffers to store the list of entry base addresses in */ buf_size = sizeof(haddr_t) * num_entries; - if (NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size))) - HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for haddr buffer") + if (NULL == (haddr_buf_ptr = (haddr_t *)H5MM_malloc(buf_size))) { + /* Push an error, but still participate in following MPI_Bcast */ + HDONE_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for haddr buffer") + } /* Now receive the list of candidate entries */ if (MPI_SUCCESS != @@ -1801,10 +1810,14 @@ H5AC__rsp__dist_md_write__flush_to_min_clean(H5F_t *f) if (evictions_enabled) { /* construct candidate list -- process 0 only */ - if (aux_ptr->mpi_rank == 0) + if (aux_ptr->mpi_rank == 0) { + /* If constructing candidate list fails, push an error but still participate + * in collective operations during following candidate list propagation + */ if (H5AC__construct_candidate_list(cache_ptr, aux_ptr, H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.") + HDONE_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't construct candidate list.") + } /* propagate and apply candidate list -- all processes */ if (H5AC__propagate_and_apply_candidate_list(f) < 0) @@ -1900,15 +1913,21 @@ H5AC__rsp__p0_only__flush(H5F_t *f) aux_ptr->write_permitted = FALSE; /* Check for error on the write operation */ - if (result < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.") - - /* this code exists primarily for the test bed -- it allows us to - * enforce POSIX semantics on the server that pretends to be a - * file system in our parallel tests. - */ - if (aux_ptr->write_done) - (aux_ptr->write_done)(); + if (result < 0) { + /* If write operation fails, push an error but still participate + * in collective operations during following cache entry + * propagation + */ + HDONE_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush.") + } + else { + /* this code exists primarily for the test bed -- it allows us to + * enforce POSIX semantics on the server that pretends to be a + * file system in our parallel tests. + */ + if (aux_ptr->write_done) + (aux_ptr->write_done)(); + } } /* end if */ /* Propagate cleaned entries to other ranks. */ @@ -2020,15 +2039,21 @@ H5AC__rsp__p0_only__flush_to_min_clean(H5F_t *f) aux_ptr->write_permitted = FALSE; /* Check for error on the write operation */ - if (result < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_flush_to_min_clean() failed.") - - /* this call exists primarily for the test code -- it is used - * to enforce POSIX semantics on the process used to simulate - * reads and writes in t_cache.c. - */ - if (aux_ptr->write_done) - (aux_ptr->write_done)(); + if (result < 0) { + /* If write operation fails, push an error but still participate + * in collective operations during following cache entry + * propagation + */ + HDONE_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5C_flush_to_min_clean() failed.") + } + else { + /* this call exists primarily for the test code -- it is used + * to enforce POSIX semantics on the process used to simulate + * reads and writes in t_cache.c. + */ + if (aux_ptr->write_done) + (aux_ptr->write_done)(); + } } /* end if */ if (H5AC__propagate_flushed_and_still_clean_entries_list(f) < 0) @@ -2094,11 +2119,11 @@ H5AC__run_sync_point(H5F_t *f, int sync_point_op) (sync_point_op == H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED)); #if H5AC_DEBUG_DIRTY_BYTES_CREATION - HDfprintf(stdout, "%d:H5AC_propagate...:%u: (u/uu/i/iu/r/ru) = %zu/%u/%zu/%u/%zu/%u\n", aux_ptr->mpi_rank, + HDfprintf(stdout, "%d:H5AC_propagate...:%u: (u/uu/i/iu/m/mu) = %zu/%u/%zu/%u/%zu/%u\n", aux_ptr->mpi_rank, aux_ptr->dirty_bytes_propagations, aux_ptr->unprotect_dirty_bytes, aux_ptr->unprotect_dirty_bytes_updates, aux_ptr->insert_dirty_bytes, - aux_ptr->insert_dirty_bytes_updates, aux_ptr->rename_dirty_bytes, - aux_ptr->rename_dirty_bytes_updates); + aux_ptr->insert_dirty_bytes_updates, aux_ptr->move_dirty_bytes, + aux_ptr->move_dirty_bytes_updates); #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ /* clear collective access flag on half of the entries in the @@ -2162,8 +2187,8 @@ H5AC__run_sync_point(H5F_t *f, int sync_point_op) aux_ptr->unprotect_dirty_bytes_updates = 0; aux_ptr->insert_dirty_bytes = 0; aux_ptr->insert_dirty_bytes_updates = 0; - aux_ptr->rename_dirty_bytes = 0; - aux_ptr->rename_dirty_bytes_updates = 0; + aux_ptr->move_dirty_bytes = 0; + aux_ptr->move_dirty_bytes_updates = 0; #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ done: |