diff options
Diffstat (limited to 'src/H5Dmpio.c')
-rw-r--r-- | src/H5Dmpio.c | 114 |
1 files changed, 61 insertions, 53 deletions
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c index f5da33d..467b1b7 100644 --- a/src/H5Dmpio.c +++ b/src/H5Dmpio.c @@ -800,6 +800,10 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf HDassert(type_info); HDassert(fm); + /* Disable collective metadata reads for chunked dataset I/O operations + * in order to prevent potential hangs */ + H5CX_set_coll_metadata_read(FALSE); + /* Check the optional property list for the collective chunk IO optimization option */ if(H5CX_get_mpio_chunk_opt_mode(&chunk_opt_mode) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "couldn't get chunk optimization option") @@ -1423,7 +1427,7 @@ H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_in for (i = 0, offset = 0; i < (size_t) mpi_rank; i++) offset += num_chunks_selected_array[i]; - HDmemcpy(chunk_list, &collective_chunk_list[offset], num_chunks_selected_array[mpi_rank] * sizeof(H5D_filtered_collective_io_info_t)); + H5MM_memcpy(chunk_list, &collective_chunk_list[offset], num_chunks_selected_array[mpi_rank] * sizeof(H5D_filtered_collective_io_info_t)); /* Create single MPI type encompassing each selection in the dataspace */ if (H5D__mpio_filtered_collective_write_type(chunk_list, chunk_list_num_entries, @@ -1549,7 +1553,7 @@ if(H5DEBUG(D)) HGOTO_ERROR(H5E_DATASET, H5E_CANTRECV, FAIL, "unable to obtain MPIO mode") /* Set up contiguous I/O info object */ - HDmemcpy(&ctg_io_info, io_info, sizeof(ctg_io_info)); + H5MM_memcpy(&ctg_io_info, io_info, sizeof(ctg_io_info)); ctg_io_info.store = &ctg_store; ctg_io_info.layout_ops = *H5D_LOPS_CONTIG; @@ -1557,7 +1561,7 @@ if(H5DEBUG(D)) ctg_store.contig.dset_size = (hsize_t)io_info->dset->shared->layout.u.chunk.size; /* Set up compact I/O info object */ - HDmemcpy(&cpt_io_info, io_info, sizeof(cpt_io_info)); + H5MM_memcpy(&cpt_io_info, io_info, sizeof(cpt_io_info)); cpt_io_info.store = &cpt_store; cpt_io_info.layout_ops = *H5D_LOPS_COMPACT; @@ -1784,7 +1788,7 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_i HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "couldn't construct filtered I/O info list") /* Set up contiguous I/O info object */ - HDmemcpy(&ctg_io_info, io_info, sizeof(ctg_io_info)); + H5MM_memcpy(&ctg_io_info, io_info, sizeof(ctg_io_info)); ctg_io_info.store = &ctg_store; ctg_io_info.layout_ops = *H5D_LOPS_CONTIG; @@ -1889,7 +1893,7 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_i /* Collect the new chunk info back to the local copy, since only the record in the * collective array gets updated by the chunk re-allocation */ - HDmemcpy(&chunk_list[i].chunk_states.new_chunk, &collective_chunk_list[offset].chunk_states.new_chunk, sizeof(chunk_list[i].chunk_states.new_chunk)); + H5MM_memcpy(&chunk_list[i].chunk_states.new_chunk, &collective_chunk_list[offset].chunk_states.new_chunk, sizeof(chunk_list[i].chunk_states.new_chunk)); H5_CHECKED_ASSIGN(mpi_type_count, int, chunk_list[i].chunk_states.new_chunk.length, hsize_t); @@ -2313,7 +2317,7 @@ if(H5DEBUG(D)) /* Broadcasting the MPI_IO option info. and chunk address info. */ if(MPI_SUCCESS != (mpi_code = MPI_Bcast(total_chunk_addr_array, (int)(sizeof(haddr_t) * fm->layout->u.chunk.nchunks), MPI_BYTE, (int)0, io_info->comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_BCast failed", mpi_code) + HMPI_GOTO_ERROR(FAIL, "MPI_BCast failed", mpi_code) } /* end if */ /* Start at first node in chunk skip list */ @@ -2513,8 +2517,8 @@ H5D__obtain_mpio_mode(H5D_io_info_t* io_info, H5D_chunk_map_t *fm, /* merge buffer io_mode info and chunk addr into one */ - HDmemcpy(mergebuf, assign_io_mode, total_chunks); - HDmemcpy(tempbuf, chunk_addr, sizeof(haddr_t) * total_chunks); + H5MM_memcpy(mergebuf, assign_io_mode, total_chunks); + H5MM_memcpy(tempbuf, chunk_addr, sizeof(haddr_t) * total_chunks); H5MM_free(nproc_per_chunk); } /* end if */ @@ -2523,8 +2527,8 @@ H5D__obtain_mpio_mode(H5D_io_info_t* io_info, H5D_chunk_map_t *fm, if(MPI_SUCCESS != (mpi_code = MPI_Bcast(mergebuf, ((sizeof(haddr_t) + 1) * total_chunks), MPI_BYTE, root, comm))) HMPI_GOTO_ERROR(FAIL, "MPI_BCast failed", mpi_code) - HDmemcpy(assign_io_mode, mergebuf, total_chunks); - HDmemcpy(chunk_addr, tempbuf, sizeof(haddr_t) * total_chunks); + H5MM_memcpy(assign_io_mode, mergebuf, total_chunks); + H5MM_memcpy(chunk_addr, tempbuf, sizeof(haddr_t) * total_chunks); #ifdef H5_HAVE_INSTRUMENTED_LIBRARY { @@ -2626,7 +2630,7 @@ H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_typ local_info_array[i].async_info.receive_buffer_array = NULL; local_info_array[i].async_info.receive_requests_array = NULL; - HDmemcpy(local_info_array[i].scaled, chunk_info->scaled, sizeof(chunk_info->scaled)); + H5MM_memcpy(local_info_array[i].scaled, chunk_info->scaled, sizeof(chunk_info->scaled)); if ((select_npoints = H5S_GET_SELECT_NPOINTS(chunk_info->mspace)) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid") @@ -2738,6 +2742,9 @@ H5D__chunk_redistribute_shared_chunks(const H5D_io_info_t *io_info, const H5D_ty if ((mpi_size = H5F_mpi_get_size(io_info->dset->oloc.file)) < 0) HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi size") + /* Set to latest format for encoding dataspace */ + H5CX_set_libver_bounds(NULL); + if (*local_chunk_array_num_entries) if (NULL == (send_requests = (MPI_Request *) H5MM_malloc(*local_chunk_array_num_entries * sizeof(MPI_Request)))) HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate send requests buffer") @@ -2799,8 +2806,9 @@ H5D__chunk_redistribute_shared_chunks(const H5D_io_info_t *io_info, const H5D_ty /* Sort the new list in order of previous owner so that each original owner of a chunk * entry gets that entry back, with the possibly newly-modified "new_owner" field */ - HDqsort(shared_chunks_info_array, shared_chunks_info_array_num_entries, - sizeof(H5D_filtered_collective_io_info_t), H5D__cmp_filtered_collective_io_info_entry_owner); + if(shared_chunks_info_array_num_entries > 1) + HDqsort(shared_chunks_info_array, shared_chunks_info_array_num_entries, + sizeof(H5D_filtered_collective_io_info_t), H5D__cmp_filtered_collective_io_info_entry_owner); send_displacements[0] = 0; for (i = 1; i < (size_t) mpi_size; i++) @@ -2837,45 +2845,45 @@ H5D__chunk_redistribute_shared_chunks(const H5D_io_info_t *io_info, const H5D_ty size_t mod_data_size; /* Look up the chunk and get its file and memory dataspaces */ - if (NULL == (chunk_info = (H5D_chunk_info_t *) H5SL_search(fm->sel_chunks, &chunk_entry->index))) + if(NULL == (chunk_info = (H5D_chunk_info_t *) H5SL_search(fm->sel_chunks, &chunk_entry->index))) HGOTO_ERROR(H5E_DATASPACE, H5E_NOTFOUND, FAIL, "can't locate chunk in skip list") /* Determine size of serialized chunk file dataspace, plus the size of * the data being written */ - if (H5S_encode(chunk_info->fspace, &mod_data_p, &mod_data_size) < 0) + if(H5S_encode(chunk_info->fspace, &mod_data_p, &mod_data_size) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "unable to get encoded dataspace size") - if ((iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace)) < 0) + if((iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace)) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid") mod_data_size += (size_t) iter_nelmts * type_info->src_type_size; - if (NULL == (mod_data[num_send_requests] = (unsigned char *) H5MM_malloc(mod_data_size))) + if(NULL == (mod_data[num_send_requests] = (unsigned char *) H5MM_malloc(mod_data_size))) HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk modification send buffer") /* Serialize the chunk's file dataspace into the buffer */ mod_data_p = mod_data[num_send_requests]; - if (H5S_encode(chunk_info->fspace, &mod_data_p, &mod_data_size) < 0) + if(H5S_encode(chunk_info->fspace, &mod_data_p, &mod_data_size) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "unable to encode dataspace") /* Initialize iterator for memory selection */ - if (H5S_select_iter_init(mem_iter, chunk_info->mspace, type_info->src_type_size) < 0) + if(H5S_select_iter_init(mem_iter, chunk_info->mspace, type_info->src_type_size, 0) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to initialize memory selection information") mem_iter_init = TRUE; /* Collect the modification data into the buffer */ - if(!H5D__gather_mem(io_info->u.wbuf, chunk_info->mspace, mem_iter, (size_t)iter_nelmts, mod_data_p)) + if(0 == H5D__gather_mem(io_info->u.wbuf, mem_iter, (size_t)iter_nelmts, mod_data_p)) HGOTO_ERROR(H5E_IO, H5E_CANTGATHER, FAIL, "couldn't gather from write buffer") /* Send modification data to new owner */ H5_CHECK_OVERFLOW(mod_data_size, size_t, int) H5_CHECK_OVERFLOW(chunk_entry->index, hsize_t, int) - if (MPI_SUCCESS != (mpi_code = MPI_Isend(mod_data[num_send_requests], (int) mod_data_size, MPI_BYTE, + if(MPI_SUCCESS != (mpi_code = MPI_Isend(mod_data[num_send_requests], (int) mod_data_size, MPI_BYTE, chunk_entry->owners.new_owner, (int) chunk_entry->index, io_info->comm, &send_requests[num_send_requests]))) HMPI_GOTO_ERROR(FAIL, "MPI_Isend failed", mpi_code) - if (mem_iter_init && H5S_SELECT_ITER_RELEASE(mem_iter) < 0) + if(mem_iter_init && H5S_SELECT_ITER_RELEASE(mem_iter) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release memory selection iterator") mem_iter_init = FALSE; @@ -3044,11 +3052,11 @@ H5D__mpio_filtered_collective_write_type(H5D_filtered_collective_io_info_t *chun } /* end if */ done: - if (write_buf_array) + if(write_buf_array) H5MM_free(write_buf_array); - if (file_offset_array) + if(file_offset_array) H5MM_free(file_offset_array); - if (length_array) + if(length_array) H5MM_free(length_array); FUNC_LEAVE_NOAPI(ret_value) @@ -3166,7 +3174,7 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk if (NULL == (mem_iter = (H5S_sel_iter_t *) H5MM_malloc(sizeof(H5S_sel_iter_t)))) HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate memory iterator") - if (H5S_select_iter_init(mem_iter, chunk_info->mspace, type_info->src_type_size) < 0) + if(H5S_select_iter_init(mem_iter, chunk_info->mspace, type_info->src_type_size, 0) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to initialize memory selection information") mem_iter_init = TRUE; @@ -3176,99 +3184,99 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk * from the current process, then apply any modifications from other processes. Finally, * filter the newly-updated chunk. */ - switch (io_info->op_type) { + switch(io_info->op_type) { case H5D_IO_OP_READ: - if (NULL == (file_iter = (H5S_sel_iter_t *) H5MM_malloc(sizeof(H5S_sel_iter_t)))) + if(NULL == (file_iter = (H5S_sel_iter_t *) H5MM_malloc(sizeof(H5S_sel_iter_t)))) HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate file iterator") - if (H5S_select_iter_init(file_iter, chunk_info->fspace, type_info->src_type_size) < 0) + if(H5S_select_iter_init(file_iter, chunk_info->fspace, type_info->src_type_size, 0) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to initialize memory selection information") file_iter_init = TRUE; - if ((iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->fspace)) < 0) + if((iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->fspace)) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid") - if (NULL == (tmp_gath_buf = H5MM_malloc((hsize_t) iter_nelmts * type_info->src_type_size))) + if(NULL == (tmp_gath_buf = H5MM_malloc((hsize_t) iter_nelmts * type_info->src_type_size))) HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate temporary gather buffer") - if (!H5D__gather_mem(chunk_entry->buf, chunk_info->fspace, file_iter, (size_t) iter_nelmts, tmp_gath_buf)) + if(!H5D__gather_mem(chunk_entry->buf, file_iter, (size_t) iter_nelmts, tmp_gath_buf)) HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "couldn't gather from chunk buffer") - if ((iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace)) < 0) + if((iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace)) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid") - if (H5D__scatter_mem(tmp_gath_buf, chunk_info->mspace, mem_iter, (size_t) iter_nelmts, io_info->u.rbuf) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't scatter to read buffer") + if(H5D__scatter_mem(tmp_gath_buf, mem_iter, (size_t) iter_nelmts, io_info->u.rbuf) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't scatter to read buffer") break; case H5D_IO_OP_WRITE: - if ((iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace)) < 0) + if((iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace)) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid") - if (NULL == (tmp_gath_buf = H5MM_malloc((hsize_t) iter_nelmts * type_info->src_type_size))) + if(NULL == (tmp_gath_buf = H5MM_malloc((hsize_t) iter_nelmts * type_info->src_type_size))) HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate temporary gather buffer") /* Gather modification data from the application write buffer into a temporary buffer */ - if(!H5D__gather_mem(io_info->u.wbuf, chunk_info->mspace, mem_iter, (size_t) iter_nelmts, tmp_gath_buf)) + if(0 == H5D__gather_mem(io_info->u.wbuf, mem_iter, (size_t) iter_nelmts, tmp_gath_buf)) HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "couldn't gather from write buffer") - if (H5S_SELECT_ITER_RELEASE(mem_iter) < 0) + if(H5S_SELECT_ITER_RELEASE(mem_iter) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator") mem_iter_init = FALSE; /* Initialize iterator for file selection */ - if (H5S_select_iter_init(mem_iter, chunk_info->fspace, type_info->dst_type_size) < 0) + if(H5S_select_iter_init(mem_iter, chunk_info->fspace, type_info->dst_type_size, 0) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to initialize file selection information") mem_iter_init = TRUE; - if ((iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->fspace)) < 0) + if((iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->fspace)) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid") /* Scatter the owner's modification data into the chunk data buffer according to * the file space. */ - if(H5D__scatter_mem(tmp_gath_buf, chunk_info->fspace, mem_iter, (size_t) iter_nelmts, chunk_entry->buf) < 0) + if(H5D__scatter_mem(tmp_gath_buf, mem_iter, (size_t) iter_nelmts, chunk_entry->buf) < 0) HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't scatter to chunk data buffer") - if (H5S_SELECT_ITER_RELEASE(mem_iter) < 0) + if(H5S_SELECT_ITER_RELEASE(mem_iter) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator") mem_iter_init = FALSE; - if (MPI_SUCCESS != (mpi_code = MPI_Waitall(chunk_entry->async_info.num_receive_requests, + if(MPI_SUCCESS != (mpi_code = MPI_Waitall(chunk_entry->async_info.num_receive_requests, chunk_entry->async_info.receive_requests_array, MPI_STATUSES_IGNORE))) HMPI_GOTO_ERROR(FAIL, "MPI_Waitall failed", mpi_code) /* For each asynchronous receive call previously posted, receive the chunk modification * buffer from another rank and update the chunk data */ - for (i = 0; i < (size_t) chunk_entry->async_info.num_receive_requests; i++) { + for(i = 0; i < (size_t) chunk_entry->async_info.num_receive_requests; i++) { const unsigned char *mod_data_p; /* Decode the process' chunk file dataspace */ mod_data_p = chunk_entry->async_info.receive_buffer_array[i]; - if (NULL == (dataspace = H5S_decode(&mod_data_p))) + if(NULL == (dataspace = H5S_decode(&mod_data_p))) HGOTO_ERROR(H5E_DATASET, H5E_CANTDECODE, FAIL, "unable to decode dataspace") - if (H5S_select_iter_init(mem_iter, dataspace, type_info->dst_type_size) < 0) + if(H5S_select_iter_init(mem_iter, dataspace, type_info->dst_type_size, 0) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to initialize memory selection information") mem_iter_init = TRUE; - if ((iter_nelmts = H5S_GET_SELECT_NPOINTS(dataspace)) < 0) + if((iter_nelmts = H5S_GET_SELECT_NPOINTS(dataspace)) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid") /* Update the chunk data with the received modification data */ - if(H5D__scatter_mem(mod_data_p, dataspace, mem_iter, (size_t) iter_nelmts, chunk_entry->buf) < 0) + if(H5D__scatter_mem(mod_data_p, mem_iter, (size_t) iter_nelmts, chunk_entry->buf) < 0) HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't scatter to write buffer") - if (H5S_SELECT_ITER_RELEASE(mem_iter) < 0) + if(H5S_SELECT_ITER_RELEASE(mem_iter) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator") mem_iter_init = FALSE; - if (dataspace) { - if (H5S_close(dataspace) < 0) + if(dataspace) { + if(H5S_close(dataspace) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "can't close dataspace") dataspace = NULL; - } + } /* end if */ H5MM_free(chunk_entry->async_info.receive_buffer_array[i]); } /* end for */ |