diff options
Diffstat (limited to 'src/H5Dmpio.c')
-rw-r--r-- | src/H5Dmpio.c | 176 |
1 files changed, 122 insertions, 54 deletions
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c index 3b5bb30..c294e42 100644 --- a/src/H5Dmpio.c +++ b/src/H5Dmpio.c @@ -157,12 +157,9 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space, const H5S_t *mem_space, const H5D_type_info_t *type_info, const H5D_chunk_map_t *fm, H5P_genplist_t *dx_plist) { - /* variables to set cause of broken collective I/O */ - int local_cause = 0; - int global_cause = 0; - - int mpi_code; /* MPI error code */ - htri_t ret_value = TRUE; + int local_cause = 0; /* Local reason(s) for breaking collective mode */ + int global_cause = 0; /* Global reason(s) for breaking collective mode */ + htri_t ret_value; /* Return value */ FUNC_ENTER_PACKAGE @@ -174,51 +171,35 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space, /* For independent I/O, get out quickly and don't try to form consensus */ - if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_INDEPENDENT) { - local_cause = H5D_MPIO_SET_INDEPENDENT; - global_cause = H5D_MPIO_SET_INDEPENDENT; - HGOTO_DONE(FALSE); - } - - /* Optimized MPI types flag must be set and it must be collective IO */ - /* (Don't allow parallel I/O for the MPI-posix driver, since it doesn't do real collective I/O) */ - if(!(H5S_mpi_opt_types_g && io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE - && !IS_H5FD_MPIPOSIX(io_info->dset->oloc.file))) { - local_cause |= H5D_MPIO_SET_MPIPOSIX; - } /* end if */ + if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_INDEPENDENT) + local_cause |= H5D_MPIO_SET_INDEPENDENT; + + /* Optimized MPI types flag must be set */ + /* (based on 'HDF5_MPI_OPT_TYPES' environment variable) */ + if(!H5FD_mpi_opt_types_g) + local_cause |= H5D_MPIO_MPI_OPT_TYPES_ENV_VAR_DISABLED; /* Don't allow collective operations if datatype conversions need to happen */ - if(!type_info->is_conv_noop) { + if(!type_info->is_conv_noop) local_cause |= H5D_MPIO_DATATYPE_CONVERSION; - } /* end if */ /* Don't allow collective operations if data transform operations should occur */ - if(!type_info->is_xform_noop) { + if(!type_info->is_xform_noop) local_cause |= H5D_MPIO_DATA_TRANSFORMS; - } /* end if */ /* Check whether these are both simple or scalar dataspaces */ if(!((H5S_SIMPLE == H5S_GET_EXTENT_TYPE(mem_space) || H5S_SCALAR == H5S_GET_EXTENT_TYPE(mem_space)) - && (H5S_SIMPLE == H5S_GET_EXTENT_TYPE(file_space) || H5S_SCALAR == H5S_GET_EXTENT_TYPE(file_space)))) { + && (H5S_SIMPLE == H5S_GET_EXTENT_TYPE(file_space) || H5S_SCALAR == H5S_GET_EXTENT_TYPE(file_space)))) local_cause |= H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES; - } /* end if */ - - /* Can't currently handle point selections */ - if(H5S_SEL_POINTS == H5S_GET_SELECT_TYPE(mem_space) - || H5S_SEL_POINTS == H5S_GET_SELECT_TYPE(file_space)) { - local_cause |= H5D_MPIO_POINT_SELECTIONS; - } /* end if */ /* Dataset storage must be contiguous or chunked */ if(!(io_info->dset->shared->layout.type == H5D_CONTIGUOUS || - io_info->dset->shared->layout.type == H5D_CHUNKED)) { + io_info->dset->shared->layout.type == H5D_CHUNKED)) local_cause |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET; - } /* end if */ /* check if external-file storage is used */ - if (io_info->dset->shared->dcpl_cache.efl.nused > 0) { + if(io_info->dset->shared->dcpl_cache.efl.nused > 0) local_cause |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET; - } /* The handling of memory space is different for chunking and contiguous * storage. For contiguous storage, mem_space and file_space won't change @@ -228,22 +209,23 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space, */ /* Don't allow collective operations if filters need to be applied */ - if(io_info->dset->shared->layout.type == H5D_CHUNKED) { - if(io_info->dset->shared->dcpl_cache.pline.nused > 0) { - local_cause |= H5D_MPIO_FILTERS; - } /* end if */ - } /* end if */ + if(io_info->dset->shared->layout.type == H5D_CHUNKED && + io_info->dset->shared->dcpl_cache.pline.nused > 0) + local_cause |= H5D_MPIO_FILTERS; - /* Form consensus opinion among all processes about whether to perform - * collective I/O - */ - if(MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_cause, &global_cause, 1, MPI_INT, MPI_BOR, io_info->comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code) - - ret_value = global_cause > 0 ? FALSE : TRUE; + /* Check for independent I/O */ + if(local_cause & H5D_MPIO_SET_INDEPENDENT) + global_cause = local_cause; + else { + int mpi_code; /* MPI error code */ + /* Form consensus opinion among all processes about whether to perform + * collective I/O + */ + if(MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_cause, &global_cause, 1, MPI_INT, MPI_BOR, io_info->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code) + } /* end else */ -done: /* Write the local value of no-collective-cause to the DXPL. */ if(H5P_set(dx_plist, H5D_MPIO_LOCAL_NO_COLLECTIVE_CAUSE_NAME, &local_cause) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "couldn't set local no collective cause property") @@ -252,6 +234,10 @@ done: if(H5P_set(dx_plist, H5D_MPIO_GLOBAL_NO_COLLECTIVE_CAUSE_NAME, &global_cause) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "couldn't set global no collective cause property") + /* Set the return value, based on the global cause */ + ret_value = global_cause > 0 ? FALSE : TRUE; + +done: FUNC_LEAVE_NOAPI(ret_value) } /* H5D__mpio_opt_possible() */ @@ -946,15 +932,58 @@ if(H5DEBUG(D)) /* Obtain MPI derived datatype from all individual chunks */ for(u = 0; u < num_chunk; u++) { - /* Disk MPI derived datatype */ + hsize_t *permute_map = NULL; /* array that holds the mapping from the old, + out-of-order displacements to the in-order + displacements of the MPI datatypes of the + point selection of the file space */ + hbool_t is_permuted = FALSE; + + /* Obtain disk and memory MPI derived datatype */ + /* NOTE: The permute_map array can be allocated within H5S_mpio_space_type + * and will be fed into the next call to H5S_mpio_space_type + * where it will be freed. + */ if(H5S_mpio_space_type(chunk_addr_info_array[u].chunk_info.fspace, - type_info->src_type_size, &chunk_ftype[u], &chunk_mpi_file_counts[u], &(chunk_mft_is_derived_array[u])) < 0) + type_info->src_type_size, + &chunk_ftype[u], /* OUT: datatype created */ + &chunk_mpi_file_counts[u], /* OUT */ + &(chunk_mft_is_derived_array[u]), /* OUT */ + TRUE, /* this is a file space, + so permute the + datatype if the point + selections are out of + order */ + &permute_map,/* OUT: a map to indicate the + permutation of points + selected in case they + are out of order */ + &is_permuted /* OUT */) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create MPI file type") - - /* Buffer MPI derived datatype */ + /* Sanity check */ + if(is_permuted) + HDassert(permute_map); if(H5S_mpio_space_type(chunk_addr_info_array[u].chunk_info.mspace, - type_info->dst_type_size, &chunk_mtype[u], &chunk_mpi_mem_counts[u], &(chunk_mbt_is_derived_array[u])) < 0) + type_info->dst_type_size, &chunk_mtype[u], + &chunk_mpi_mem_counts[u], + &(chunk_mbt_is_derived_array[u]), + FALSE, /* this is a memory + space, so if the file + space is not + permuted, there is no + need to permute the + datatype if the point + selections are out of + order*/ + &permute_map, /* IN: the permutation map + generated by the + file_space selection + and applied to the + memory selection */ + &is_permuted /* IN */) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create MPI buf type") + /* Sanity check */ + if(is_permuted) + HDassert(!permute_map); /* Chunk address relative to the first chunk */ chunk_addr_info_array[u].chunk_addr -= ctg_store.contig.dset_addr; @@ -1309,12 +1338,51 @@ H5D__inter_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf if((file_space != NULL) && (mem_space != NULL)) { int mpi_file_count; /* Number of file "objects" to transfer */ + hsize_t *permute_map = NULL; /* array that holds the mapping from the old, + out-of-order displacements to the in-order + displacements of the MPI datatypes of the + point selection of the file space */ + hbool_t is_permuted = FALSE; /* Obtain disk and memory MPI derived datatype */ - if(H5S_mpio_space_type(file_space, type_info->src_type_size, &mpi_file_type, &mpi_file_count, &mft_is_derived) < 0) + /* NOTE: The permute_map array can be allocated within H5S_mpio_space_type + * and will be fed into the next call to H5S_mpio_space_type + * where it will be freed. + */ + if(H5S_mpio_space_type(file_space, type_info->src_type_size, + &mpi_file_type, &mpi_file_count, &mft_is_derived, /* OUT: datatype created */ + TRUE, /* this is a file space, so + permute the datatype if the + point selection is out of + order */ + &permute_map, /* OUT: a map to indicate + the permutation of + points selected in + case they are out of + order */ + &is_permuted /* OUT */) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create MPI file type") - if(H5S_mpio_space_type(mem_space, type_info->src_type_size, &mpi_buf_type, &mpi_buf_count, &mbt_is_derived) < 0) + /* Sanity check */ + if(is_permuted) + HDassert(permute_map); + if(H5S_mpio_space_type(mem_space, type_info->src_type_size, + &mpi_buf_type, &mpi_buf_count, &mbt_is_derived, /* OUT: datatype created */ + FALSE, /* this is a memory space, so if + the file space is not + permuted, there is no need to + permute the datatype if the + point selections are out of + order*/ + &permute_map /* IN: the permutation map + generated by the + file_space selection + and applied to the + memory selection */, + &is_permuted /* IN */) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create MPI buffer type") + /* Sanity check */ + if(is_permuted) + HDassert(!permute_map); } /* end if */ else { /* For non-selection, participate with a none MPI derived datatype, the count is 0. */ |