diff options
author | Dana Robinson <derobins@hdfgroup.org> | 2021-05-07 19:46:03 (GMT) |
---|---|---|
committer | Dana Robinson <derobins@hdfgroup.org> | 2021-05-07 19:46:03 (GMT) |
commit | 2f92cb5d19c0040326061fd2010bfd56c908879c (patch) | |
tree | 6a5df93bd13345e96e89d25ef0f9c49fcf956be9 /src | |
parent | 304df18dab6e35e240399f5da538c2f497fbaa59 (diff) | |
download | hdf5-2f92cb5d19c0040326061fd2010bfd56c908879c.zip hdf5-2f92cb5d19c0040326061fd2010bfd56c908879c.tar.gz hdf5-2f92cb5d19c0040326061fd2010bfd56c908879c.tar.bz2 |
Normalization of parallel
Diffstat (limited to 'src')
-rw-r--r-- | src/H5ACmpio.c | 17 | ||||
-rw-r--r-- | src/H5Dmpio.c | 32 |
2 files changed, 33 insertions, 16 deletions
diff --git a/src/H5ACmpio.c b/src/H5ACmpio.c index b0b54eb..afc15d1 100644 --- a/src/H5ACmpio.c +++ b/src/H5ACmpio.c @@ -1271,7 +1271,7 @@ H5AC__propagate_and_apply_candidate_list(H5F_t *f) if (aux_ptr->write_done) (aux_ptr->write_done)(); - /* to prevent "messages from the past" we must synchronize all + /* To prevent "messages from the past" we must synchronize all * processes again before we go on. */ if (MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) @@ -1514,7 +1514,7 @@ H5AC__receive_and_apply_clean_list(H5F_t *f) HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't receive clean list") if (num_entries > 0) - /* mark the indicated entries as clean */ + /* Mark the indicated entries as clean */ if (H5C_mark_entries_as_clean(f, num_entries, haddr_buf_ptr) < 0) HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't mark entries clean.") @@ -1848,6 +1848,8 @@ done: * Programmer: John Mainzer * April 28, 2010 * + * Changes: None. + * *------------------------------------------------------------------------- */ static herr_t @@ -1876,9 +1878,12 @@ H5AC__rsp__p0_only__flush(H5F_t *f) * However, when flushing from within the close operation from a file, * it's possible to skip this barrier (on the second flush of the cache). */ - if (!H5CX_get_mpi_file_flushing()) + if (!H5CX_get_mpi_file_flushing()) { + if (MPI_SUCCESS != (mpi_result = MPI_Barrier(aux_ptr->mpi_comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_result) + } /* Flush data to disk, from rank 0 process */ if (aux_ptr->mpi_rank == 0) { @@ -2075,9 +2080,13 @@ H5AC__run_sync_point(H5F_t *f, int sync_point_op) /* Sanity checks */ HDassert(f != NULL); + cache_ptr = f->shared->cache; + HDassert(cache_ptr != NULL); + aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr); + HDassert(aux_ptr != NULL); HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); HDassert((sync_point_op == H5AC_SYNC_POINT_OP__FLUSH_TO_MIN_CLEAN) || @@ -2157,6 +2166,7 @@ H5AC__run_sync_point(H5F_t *f, int sync_point_op) #endif /* H5AC_DEBUG_DIRTY_BYTES_CREATION */ done: + FUNC_LEAVE_NOAPI(ret_value) } /* H5AC__run_sync_point() */ @@ -2252,7 +2262,6 @@ H5AC__tidy_cache_0_lists(H5AC_t *cache_ptr, unsigned num_candidates, haddr_t *ca * request to flush all items and something was protected. * * Programmer: Quincey Koziol - * koziol@hdfgroup.org * Aug 22 2009 * *------------------------------------------------------------------------- diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c index 16a8221..448e92d 100644 --- a/src/H5Dmpio.c +++ b/src/H5Dmpio.c @@ -413,7 +413,7 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space, co * collective I/O */ if (MPI_SUCCESS != - (mpi_code = MPI_Allreduce(&local_cause, &global_cause, 2, MPI_UNSIGNED, MPI_BOR, io_info->comm))) + (mpi_code = MPI_Allreduce(local_cause, global_cause, 2, MPI_UNSIGNED, MPI_BOR, io_info->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code) } /* end else */ @@ -1094,7 +1094,7 @@ H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *typ #ifdef H5D_DEBUG if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "total_chunks = %Zu, num_chunk = %Zu\n", total_chunks, num_chunk); + HDfprintf(H5DEBUG(D), "total_chunks = %zu, num_chunk = %zu\n", total_chunks, num_chunk); #endif /* Set up MPI datatype for chunks selected */ @@ -1574,7 +1574,7 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty chunk_addr = (haddr_t *)H5MM_calloc(total_chunk * sizeof(haddr_t)); #ifdef H5D_DEBUG if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "total_chunk %Zu\n", total_chunk); + HDfprintf(H5DEBUG(D), "total_chunk %zu\n", total_chunk); #endif /* Obtain IO option for each chunk */ @@ -1608,7 +1608,7 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty #ifdef H5D_DEBUG if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "mpi_rank = %d, chunk index = %Zu\n", mpi_rank, u); + HDfprintf(H5DEBUG(D), "mpi_rank = %d, chunk index = %zu\n", mpi_rank, u); #endif /* Get the chunk info for this chunk, if there are elements selected */ chunk_info = fm->select_chunk[u]; @@ -1628,7 +1628,7 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty if (chunk_io_option[u] == H5D_CHUNK_IO_MODE_COL) { #ifdef H5D_DEBUG if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "inside collective chunk IO mpi_rank = %d, chunk index = %Zu\n", + HDfprintf(H5DEBUG(D), "inside collective chunk IO mpi_rank = %d, chunk index = %zu\n", mpi_rank, u); #endif @@ -1667,7 +1667,7 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty else { /* possible independent IO for this chunk */ #ifdef H5D_DEBUG if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "inside independent IO mpi_rank = %d, chunk index = %Zu\n", mpi_rank, + HDfprintf(H5DEBUG(D), "inside independent IO mpi_rank = %d, chunk index = %zu\n", mpi_rank, u); #endif @@ -2838,7 +2838,7 @@ H5D__chunk_redistribute_shared_chunks(const H5D_io_info_t *io_info, const H5D_ty "unable to allocate number of assigned chunks array") for (i = 0; i < shared_chunks_info_array_num_entries;) { - H5D_filtered_collective_io_info_t chunk_entry; + H5D_filtered_collective_io_info_t *chunk_entry; haddr_t last_seen_addr = shared_chunks_info_array[i].chunk_states.chunk_current.offset; size_t set_begin_index = i; size_t num_writers = 0; @@ -2846,17 +2846,17 @@ H5D__chunk_redistribute_shared_chunks(const H5D_io_info_t *io_info, const H5D_ty /* Process each set of duplicate entries caused by another process writing to the same chunk */ do { - chunk_entry = shared_chunks_info_array[i]; + chunk_entry = &shared_chunks_info_array[i]; - send_counts[chunk_entry.owners.original_owner] += (int)sizeof(chunk_entry); + send_counts[chunk_entry->owners.original_owner] += (int)sizeof(*chunk_entry); /* The new owner of the chunk is determined by the process * writing to the chunk which currently has the least amount * of chunks assigned to it */ - if (num_assigned_chunks_array[chunk_entry.owners.original_owner] < + if (num_assigned_chunks_array[chunk_entry->owners.original_owner] < num_assigned_chunks_array[new_chunk_owner]) - new_chunk_owner = chunk_entry.owners.original_owner; + new_chunk_owner = chunk_entry->owners.original_owner; num_writers++; } while (++i < shared_chunks_info_array_num_entries && @@ -2907,6 +2907,8 @@ H5D__chunk_redistribute_shared_chunks(const H5D_io_info_t *io_info, const H5D_ty sizeof(unsigned char *)))) HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate modification data buffer array") + /* Perform all the sends on the chunks that this rank doesn't own */ + /* (Sends and recvs must be two separate loops, to avoid deadlock) */ for (i = 0, last_assigned_idx = 0; i < *local_chunk_array_num_entries; i++) { H5D_filtered_collective_io_info_t *chunk_entry = &local_chunk_array[i]; @@ -2965,7 +2967,13 @@ H5D__chunk_redistribute_shared_chunks(const H5D_io_info_t *io_info, const H5D_ty num_send_requests++; } /* end if */ - else { + } /* end for */ + + /* Perform all the recvs on the chunks this rank owns */ + for (i = 0, last_assigned_idx = 0; i < *local_chunk_array_num_entries; i++) { + H5D_filtered_collective_io_info_t *chunk_entry = &local_chunk_array[i]; + + if (mpi_rank == chunk_entry->owners.new_owner) { /* Allocate all necessary buffers for an asynchronous receive operation */ if (chunk_entry->num_writers > 1) { MPI_Message message; |