diff options
author | vchoi-hdfgroup <55293060+vchoi-hdfgroup@users.noreply.github.com> | 2023-08-07 03:12:07 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-07 03:12:07 (GMT) |
commit | 291b2f7ae42958aa60b84a67a93eeca563a1d089 (patch) | |
tree | dc3536c6d779aadd4ca23bb7890d2751844d0780 /src/H5FDmpio.c | |
parent | fb38a8b9249b9b01d71e706cac9c044e6f25d681 (diff) | |
download | hdf5-291b2f7ae42958aa60b84a67a93eeca563a1d089.zip hdf5-291b2f7ae42958aa60b84a67a93eeca563a1d089.tar.gz hdf5-291b2f7ae42958aa60b84a67a93eeca563a1d089.tar.bz2 |
Implementation of the mpio driver with selection I/O. (#3222)
* This changes the default selection I/O to on for MPIO.
Diffstat (limited to 'src/H5FDmpio.c')
-rw-r--r-- | src/H5FDmpio.c | 1026 |
1 files changed, 981 insertions, 45 deletions
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index a76b183..aa83be4 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -88,11 +88,20 @@ static herr_t H5FD__mpio_read_vector(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_i static herr_t H5FD__mpio_write_vector(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, uint32_t count, H5FD_mem_t types[], haddr_t addrs[], size_t sizes[], const void *bufs[]); -static herr_t H5FD__mpio_flush(H5FD_t *_file, hid_t dxpl_id, hbool_t closing); -static herr_t H5FD__mpio_truncate(H5FD_t *_file, hid_t dxpl_id, hbool_t closing); -static herr_t H5FD__mpio_delete(const char *filename, hid_t fapl_id); -static herr_t H5FD__mpio_ctl(H5FD_t *_file, uint64_t op_code, uint64_t flags, const void *input, - void **output); + +static herr_t H5FD__mpio_read_selection(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id, + size_t count, hid_t mem_space_ids[], hid_t file_space_ids[], + haddr_t offsets[], size_t element_sizes[], void *bufs[]); + +static herr_t H5FD__mpio_write_selection(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id, + size_t count, hid_t mem_space_ids[], hid_t file_space_ids[], + haddr_t offsets[], size_t element_sizes[], const void *bufs[]); + +static herr_t H5FD__mpio_flush(H5FD_t *_file, hid_t dxpl_id, hbool_t closing); +static herr_t H5FD__mpio_truncate(H5FD_t *_file, hid_t dxpl_id, hbool_t closing); +static herr_t H5FD__mpio_delete(const char *filename, hid_t fapl_id); +static herr_t H5FD__mpio_ctl(H5FD_t *_file, uint64_t op_code, uint64_t flags, const void *input, + void **output); /* Other functions */ static herr_t H5FD__mpio_vector_build_types( @@ -101,48 +110,55 @@ static herr_t H5FD__mpio_vector_build_types( MPI_Offset *mpi_off, H5_flexible_const_ptr_t *mpi_bufs_base, int *size_i, MPI_Datatype *buf_type, hbool_t *buf_type_created, MPI_Datatype *file_type, hbool_t *file_type_created, char *unused); +static herr_t H5FD__selection_build_types(hbool_t io_op_write, size_t num_pieces, H5_flexible_const_ptr_t mbb, + H5S_t **file_spaces, H5S_t **mem_spaces, haddr_t offsets[], + H5_flexible_const_ptr_t bufs[], size_t src_element_sizes[], + size_t dst_element_sizes[], MPI_Datatype *final_ftype, + hbool_t *final_ftype_is_derived, MPI_Datatype *final_mtype, + hbool_t *final_mtype_is_derived); + /* The MPIO file driver information */ static const H5FD_class_t H5FD_mpio_g = { - H5FD_CLASS_VERSION, /* struct version */ - H5_VFD_MPIO, /* value */ - "mpio", /* name */ - HADDR_MAX, /* maxaddr */ - H5F_CLOSE_SEMI, /* fc_degree */ - H5FD__mpio_term, /* terminate */ - NULL, /* sb_size */ - NULL, /* sb_encode */ - NULL, /* sb_decode */ - 0, /* fapl_size */ - NULL, /* fapl_get */ - NULL, /* fapl_copy */ - NULL, /* fapl_free */ - 0, /* dxpl_size */ - NULL, /* dxpl_copy */ - NULL, /* dxpl_free */ - H5FD__mpio_open, /* open */ - H5FD__mpio_close, /* close */ - NULL, /* cmp */ - H5FD__mpio_query, /* query */ - NULL, /* get_type_map */ - NULL, /* alloc */ - NULL, /* free */ - H5FD__mpio_get_eoa, /* get_eoa */ - H5FD__mpio_set_eoa, /* set_eoa */ - H5FD__mpio_get_eof, /* get_eof */ - H5FD__mpio_get_handle, /* get_handle */ - H5FD__mpio_read, /* read */ - H5FD__mpio_write, /* write */ - H5FD__mpio_read_vector, /* read_vector */ - H5FD__mpio_write_vector, /* write_vector */ - NULL, /* read_selection */ - NULL, /* write_selection */ - H5FD__mpio_flush, /* flush */ - H5FD__mpio_truncate, /* truncate */ - NULL, /* lock */ - NULL, /* unlock */ - H5FD__mpio_delete, /* del */ - H5FD__mpio_ctl, /* ctl */ - H5FD_FLMAP_DICHOTOMY /* fl_map */ + H5FD_CLASS_VERSION, /* struct version */ + H5_VFD_MPIO, /* value */ + "mpio", /* name */ + HADDR_MAX, /* maxaddr */ + H5F_CLOSE_SEMI, /* fc_degree */ + H5FD__mpio_term, /* terminate */ + NULL, /* sb_size */ + NULL, /* sb_encode */ + NULL, /* sb_decode */ + 0, /* fapl_size */ + NULL, /* fapl_get */ + NULL, /* fapl_copy */ + NULL, /* fapl_free */ + 0, /* dxpl_size */ + NULL, /* dxpl_copy */ + NULL, /* dxpl_free */ + H5FD__mpio_open, /* open */ + H5FD__mpio_close, /* close */ + NULL, /* cmp */ + H5FD__mpio_query, /* query */ + NULL, /* get_type_map */ + NULL, /* alloc */ + NULL, /* free */ + H5FD__mpio_get_eoa, /* get_eoa */ + H5FD__mpio_set_eoa, /* set_eoa */ + H5FD__mpio_get_eof, /* get_eof */ + H5FD__mpio_get_handle, /* get_handle */ + H5FD__mpio_read, /* read */ + H5FD__mpio_write, /* write */ + H5FD__mpio_read_vector, /* read_vector */ + H5FD__mpio_write_vector, /* write_vector */ + H5FD__mpio_read_selection, /* read_selection */ + H5FD__mpio_write_selection, /* write_selection */ + H5FD__mpio_flush, /* flush */ + H5FD__mpio_truncate, /* truncate */ + NULL, /* lock */ + NULL, /* unlock */ + H5FD__mpio_delete, /* del */ + H5FD__mpio_ctl, /* ctl */ + H5FD_FLMAP_DICHOTOMY /* fl_map */ }; #ifdef H5FDmpio_DEBUG @@ -2717,6 +2733,926 @@ done: } /* end H5FD__mpio_write_vector() */ /*------------------------------------------------------------------------- + * Function: H5FD__selection_build_types + * + * Purpose: Build MPI derived datatype for each piece and then + * build MPI final derived datatype for file and memory. + * + * Note: This is derived from H5D__link_piece_collective_io() in + * src/H5Dmpio.c. + * + * Return: Non-negative on success/Negative on failure + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD__selection_build_types(hbool_t io_op_write, size_t num_pieces, H5_flexible_const_ptr_t mbb, + H5S_t **file_spaces, H5S_t **mem_spaces, haddr_t offsets[], + H5_flexible_const_ptr_t bufs[], size_t src_element_sizes[], + size_t dst_element_sizes[], MPI_Datatype *final_ftype, + hbool_t *final_ftype_is_derived, MPI_Datatype *final_mtype, + hbool_t *final_mtype_is_derived) +{ + + MPI_Datatype *piece_mtype = NULL; + MPI_Datatype *piece_ftype = NULL; + MPI_Aint *piece_file_disp_array = NULL; + MPI_Aint *piece_mem_disp_array = NULL; + hbool_t *piece_mft_is_derived_array = + NULL; /* Flags to indicate each piece's MPI file datatype is derived */ + ; + hbool_t *piece_mmt_is_derived_array = + NULL; /* Flags to indicate each piece's MPI memory datatype is derived */ + int *piece_mpi_file_counts = NULL; /* Count of MPI file datatype for each piece */ + int *piece_mpi_mem_counts = NULL; /* Count of MPI memory datatype for each piece */ + + haddr_t base_file_addr; + size_t i; /* Local index variable */ + int mpi_code; /* MPI return code */ + + hbool_t extend_src_sizes = FALSE; + hbool_t extend_dst_sizes = FALSE; + hbool_t extend_bufs = FALSE; + H5_flexible_const_ptr_t buf; + size_t src_element_size, dst_element_size; + + herr_t ret_value = SUCCEED; + + FUNC_ENTER_PACKAGE + + /* Allocate information for num_pieces */ + if (NULL == (piece_mtype = (MPI_Datatype *)H5MM_malloc(num_pieces * sizeof(MPI_Datatype)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate piece memory datatype buffer"); + if (NULL == (piece_ftype = (MPI_Datatype *)H5MM_malloc(num_pieces * sizeof(MPI_Datatype)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate piece file datatype buffer"); + if (NULL == (piece_file_disp_array = (MPI_Aint *)H5MM_malloc(num_pieces * sizeof(MPI_Aint)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate piece file displacement buffer"); + if (NULL == (piece_mem_disp_array = (MPI_Aint *)H5MM_calloc(num_pieces * sizeof(MPI_Aint)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate piece memory displacement buffer"); + if (NULL == (piece_mpi_mem_counts = (int *)H5MM_calloc(num_pieces * sizeof(int)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate piece memory counts buffer"); + if (NULL == (piece_mpi_file_counts = (int *)H5MM_calloc(num_pieces * sizeof(int)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate piece file counts buffer"); + if (NULL == (piece_mmt_is_derived_array = (hbool_t *)H5MM_calloc(num_pieces * sizeof(hbool_t)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, + "couldn't allocate piece memory is derived datatype flags buffer"); + if (NULL == (piece_mft_is_derived_array = (hbool_t *)H5MM_calloc(num_pieces * sizeof(hbool_t)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, + "couldn't allocate piece file is derived datatype flags buffer"); + + /* save lowest file address */ + base_file_addr = offsets[0]; + + /* Obtain MPI derived datatype from all individual pieces */ + /* Iterate over selected pieces for this process */ + for (i = 0; i < num_pieces; i++) { + hsize_t *permute_map = NULL; /* array that holds the mapping from the old, + out-of-order displacements to the in-order + displacements of the MPI datatypes of the + point selection of the file space */ + hbool_t is_permuted = FALSE; + + if (!extend_src_sizes) { + if (src_element_sizes[i] == 0) { + extend_src_sizes = TRUE; + src_element_size = src_element_sizes[i - 1]; + } + else + src_element_size = src_element_sizes[i]; + } + + if (!extend_dst_sizes) { + if (dst_element_sizes[i] == 0) { + extend_dst_sizes = TRUE; + dst_element_size = dst_element_sizes[i - 1]; + } + else + dst_element_size = src_element_sizes[i]; + } + + if (!extend_bufs) { + if (bufs[i].cvp == NULL) { + extend_bufs = TRUE; + buf = bufs[i - 1]; + } + else + buf = bufs[i]; + } + + /* Obtain disk and memory MPI derived datatype */ + /* NOTE: The permute_map array can be allocated within H5S_mpio_space_type + * and will be fed into the next call to H5S_mpio_space_type + * where it will be freed. + */ + if (H5S_mpio_space_type(file_spaces[i], src_element_size, &piece_ftype[i], /* OUT: datatype created */ + &piece_mpi_file_counts[i], /* OUT */ + &(piece_mft_is_derived_array[i]), /* OUT */ + TRUE, /* this is a file space, + so permute the + datatype if the point + selections are out of + order */ + &permute_map, /* OUT: a map to indicate the + permutation of points + selected in case they + are out of order */ + &is_permuted /* OUT */) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create MPI file type"); + + /* Sanity check */ + if (is_permuted) + assert(permute_map); + + if (H5S_mpio_space_type(mem_spaces[i], dst_element_size, &piece_mtype[i], &piece_mpi_mem_counts[i], + &(piece_mmt_is_derived_array[i]), FALSE, /* this is a memory + space, so if the file + space is not + permuted, there is no + need to permute the + datatype if the point + selections are out of + order*/ + &permute_map, /* IN: the permutation map + generated by the + file_space selection + and applied to the + memory selection */ + &is_permuted /* IN */) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create MPI buf type"); + + /* Sanity check */ + if (is_permuted) + assert(!permute_map); + + /* Piece address relative to the first piece addr + * Assign piece address to MPI displacement + * (assume MPI_Aint big enough to hold it) */ + piece_file_disp_array[i] = (MPI_Aint)offsets[i] - (MPI_Aint)base_file_addr; + + if (io_op_write) { + piece_mem_disp_array[i] = (MPI_Aint)buf.cvp - (MPI_Aint)mbb.cvp; + } + else { + piece_mem_disp_array[i] = (MPI_Aint)buf.vp - (MPI_Aint)mbb.vp; + } + } /* end for */ + + /* Create final MPI derived datatype for the file */ + if (MPI_SUCCESS != (mpi_code = MPI_Type_create_struct((int)num_pieces, piece_mpi_file_counts, + piece_file_disp_array, piece_ftype, final_ftype))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code); + + if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(final_ftype))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code); + *final_ftype_is_derived = TRUE; + + /* Create final MPI derived datatype for memory */ + if (MPI_SUCCESS != (mpi_code = MPI_Type_create_struct((int)num_pieces, piece_mpi_mem_counts, + piece_mem_disp_array, piece_mtype, final_mtype))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code); + + if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(final_mtype))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code); + *final_mtype_is_derived = TRUE; + + /* Free the file & memory MPI datatypes for each piece */ + for (i = 0; i < num_pieces; i++) { + if (piece_mmt_is_derived_array[i]) + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(piece_mtype + i))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code); + + if (piece_mft_is_derived_array[i]) + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(piece_ftype + i))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code); + } /* end for */ + +done: + + /* Release resources */ + if (piece_mtype) + H5MM_xfree(piece_mtype); + if (piece_ftype) + H5MM_xfree(piece_ftype); + if (piece_file_disp_array) + H5MM_xfree(piece_file_disp_array); + if (piece_mem_disp_array) + H5MM_xfree(piece_mem_disp_array); + if (piece_mpi_mem_counts) + H5MM_xfree(piece_mpi_mem_counts); + if (piece_mpi_file_counts) + H5MM_xfree(piece_mpi_file_counts); + if (piece_mmt_is_derived_array) + H5MM_xfree(piece_mmt_is_derived_array); + if (piece_mft_is_derived_array) + H5MM_xfree(piece_mft_is_derived_array); + + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FD__selection_build_types() */ + +/*------------------------------------------------------------------------- + * Function: H5FD__mpio_read_selection + * + * Purpose: The behaviour of this function dependes on the value of + * the transfer mode obtained from the context. + * + * If the transfer mode is H5FD_MPIO_COLLECTIVE: + * --sort the selections + * --set mpi_bufs_base + * --build the MPI derived types + * --perform MPI_File_set_view() + * --perform MPI_File_read_at_all() or MPI_File_read_at() + * depending on whether this is a H5FD_MPIO_COLLECTIVE_IO + * + * If this is not H5FD_MPIO_COLLECTIVE: + * --undo possible base address addition in internal routines + * --call H5FD_read_vector_from_selection() to perform vector + * or scalar writes for the selections + * + * Return: Success: SUCCEED. + * Failure: FAIL. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD__mpio_read_selection(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id, size_t count, + hid_t mem_space_ids[], hid_t file_space_ids[], haddr_t offsets[], + size_t element_sizes[], void *bufs[] /* out */) +{ + H5FD_mpio_t *file = (H5FD_mpio_t *)_file; + MPI_Offset mpi_off; + MPI_Status mpi_stat; /* Status from I/O operation */ + int size_i; /* Integer copy of 'size' to read */ + + H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode */ + H5FD_mpio_collective_opt_t coll_opt_mode; + + MPI_Datatype final_mtype; /* Final memory MPI datatype for all pieces with selection */ + hbool_t final_mtype_is_derived = FALSE; + + MPI_Datatype final_ftype; /* Final file MPI datatype for all pieces with selection */ + hbool_t final_ftype_is_derived = FALSE; + + hid_t *s_mem_space_ids = NULL; + hid_t *s_file_space_ids = NULL; + haddr_t *s_offsets = NULL; + size_t *s_element_sizes = NULL; + H5_flexible_const_ptr_t *s_bufs = NULL; + hbool_t selection_was_sorted = TRUE; + + uint32_t i, j; + H5S_t **s_mem_spaces = NULL; + H5S_t **s_file_spaces = NULL; + haddr_t tmp_offset = 0; + void *mpi_bufs_base = NULL; + char unused = 0; /* Unused, except for non-NULL pointer value */ + +#if H5_CHECK_MPI_VERSION(3, 0) + MPI_Count bytes_read = 0; /* Number of bytes read in */ + MPI_Count type_size; /* MPI datatype used for I/O's size */ + MPI_Count io_size; /* Actual number of bytes requested */ + MPI_Count n; +#else + int bytes_read = 0; /* Number of bytes read in */ + int type_size; /* MPI datatype used for I/O's size */ + int io_size; /* Actual number of bytes requested */ + int n; +#endif + hbool_t rank0_bcast = FALSE; /* If read-with-rank0-and-bcast flag was used */ +#ifdef H5FDmpio_DEBUG + hbool_t H5FD_mpio_debug_t_flag = (H5FD_mpio_debug_flags_s[(int)'t'] && H5FD_MPIO_TRACE_THIS_RANK(file)); + hbool_t H5FD_mpio_debug_r_flag = (H5FD_mpio_debug_flags_s[(int)'r'] && H5FD_MPIO_TRACE_THIS_RANK(file)); +#endif + int mpi_code; /* MPI return code */ + H5_flexible_const_ptr_t mbb; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_PACKAGE + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_t_flag) + fprintf(stderr, "%s: (%d) Entering\n", __func__, file->mpi_rank); +#endif + + /* Sanity checks */ + assert(file); + assert(H5FD_MPIO == file->pub.driver_id); + assert((count == 0) || (mem_space_ids)); + assert((count == 0) || (file_space_ids)); + assert((count == 0) || (offsets)); + assert((count == 0) || (element_sizes)); + assert((count == 0) || (bufs)); + + /* Verify that the first elements of the element_sizes and bufs arrays are + * valid. */ + assert((count == 0) || (element_sizes[0] != 0)); + assert((count == 0) || (bufs[0] != NULL)); + + /* Portably initialize MPI status variable */ + memset(&mpi_stat, 0, sizeof(MPI_Status)); + + /* Get the transfer mode from the API context */ + if (H5CX_get_io_xfer_mode(&xfer_mode) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode"); + + /* + * Set up for a fancy xfer using complex types, or single byte block. We + * wouldn't need to rely on the use_view field if MPI semantics allowed + * us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which + * could mean "use MPI_BYTE" by convention). + */ + if (xfer_mode == H5FD_MPIO_COLLECTIVE) { + + if (count) { + if (H5FD_sort_selection_io_req(&selection_was_sorted, count, mem_space_ids, file_space_ids, + offsets, element_sizes, (H5_flexible_const_ptr_t *)bufs, + &s_mem_space_ids, &s_file_space_ids, &s_offsets, &s_element_sizes, + &s_bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "can't sort selection I/O request"); + + tmp_offset = s_offsets[0]; + + if (NULL == (s_file_spaces = H5MM_malloc(count * sizeof(H5S_t *)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for file space list"); + if (NULL == (s_mem_spaces = H5MM_malloc(count * sizeof(H5S_t *)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for memory space list"); + + for (i = 0; i < count; i++) { + if (NULL == (s_mem_spaces[i] = (H5S_t *)H5I_object_verify(s_mem_space_ids[i], H5I_DATASPACE))) + HGOTO_ERROR(H5E_VFL, H5E_BADTYPE, H5I_INVALID_HID, + "can't retrieve memory dataspace from ID"); + if (NULL == + (s_file_spaces[i] = (H5S_t *)H5I_object_verify(s_file_space_ids[i], H5I_DATASPACE))) + HGOTO_ERROR(H5E_VFL, H5E_BADTYPE, H5I_INVALID_HID, + "can't retrieve file dataspace from ID"); + } + + /* when we setup mpi_bufs[] below, all addresses are offsets from + * mpi_bufs_base. + * + * Since these offsets must all be positive, we must scan through + * s_bufs[] to find the smallest value, and choose that for + * mpi_bufs_base. + */ + j = 0; /* guess at the index of the smallest value of s_bufs[] */ + + if (s_bufs[j + 1].vp != NULL) { + for (i = 1; i < count; i++) + if (s_bufs[i].vp < s_bufs[j].vp) + j = i; + } + + mpi_bufs_base = s_bufs[j].vp; + mbb.vp = mpi_bufs_base; + + if (H5FD__selection_build_types(FALSE, count, mbb, s_file_spaces, s_mem_spaces, s_offsets, s_bufs, + s_element_sizes, s_element_sizes, &final_ftype, + &final_ftype_is_derived, &final_mtype, + &final_mtype_is_derived) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't build type for MPI-IO"); + + /* We have a single, complicated MPI datatype for both memory & file */ + size_i = 1; + } + else { + + /* No chunks selected for this process */ + size_i = 0; + + mpi_bufs_base = &unused; + + /* Set the MPI datatype */ + final_ftype = MPI_BYTE; + final_mtype = MPI_BYTE; + } + + /* some numeric conversions */ + if (H5FD_mpi_haddr_to_MPIOff(tmp_offset, &mpi_off /*out*/) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off"); + + /* + * Set the file view when we are using MPI derived types + */ + if (MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, final_ftype, + H5FD_mpi_native_g, file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code); + + /* When using types, use the address as the displacement for + * MPI_File_set_view and reset the address for the read to zero + */ + /* Reset mpi_off to 0 since the view now starts at the data offset */ + if (H5FD_mpi_haddr_to_MPIOff((haddr_t)0, &mpi_off) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't set MPI off to 0"); + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_r_flag) + fprintf(stderr, "%s: (%d) using MPIO collective mode\n", __func__, file->mpi_rank); +#endif + /* Get the collective_opt property to check whether the application wants to do IO individually. */ + if (H5CX_get_mpio_coll_opt(&coll_opt_mode) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O collective_op property"); + + if (coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO) { +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_r_flag) + fprintf(stderr, "%s: (%d) doing MPI collective IO\n", __func__, file->mpi_rank); +#endif + /* Check whether we should read from rank 0 and broadcast to other ranks */ + if (H5CX_get_mpio_rank0_bcast()) { +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_r_flag) + fprintf(stderr, "%s: (%d) doing read-rank0-and-MPI_Bcast\n", __func__, file->mpi_rank); +#endif + /* Indicate path we've taken */ + rank0_bcast = TRUE; + + /* Read on rank 0 Bcast to other ranks */ + if (file->mpi_rank == 0) { + /* If MPI_File_read_at fails, push an error, but continue + * to participate in following MPI_Bcast */ + if (MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, mpi_bufs_base, size_i, + final_mtype, &mpi_stat))) + HMPI_DONE_ERROR(FAIL, "MPI_File_read_at failed", mpi_code); + } + + if (MPI_SUCCESS != (mpi_code = MPI_Bcast(mpi_bufs_base, size_i, final_mtype, 0, file->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code); + } /* end if */ + else + /* Perform collective read operation */ + if (MPI_SUCCESS != (mpi_code = MPI_File_read_at_all(file->f, mpi_off, mpi_bufs_base, size_i, + final_mtype, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code); + } /* end if */ + else { +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_r_flag) + fprintf(stderr, "%s: (%d) doing MPI independent IO\n", __func__, file->mpi_rank); +#endif + + /* Perform independent read operation */ + if (MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, mpi_bufs_base, size_i, + final_mtype, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code); + } /* end else */ + + /* + * Reset the file view when we used MPI derived types + */ + if (MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, + H5FD_mpi_native_g, file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code); + + /* Only retrieve bytes read if this rank _actually_ participated in I/O */ + if (!rank0_bcast || (rank0_bcast && file->mpi_rank == 0)) { + /* How many bytes were actually read? */ +#if H5_CHECK_MPI_VERSION(3, 0) + if (MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, final_mtype, &bytes_read))) { +#else + if (MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read))) { +#endif + if (rank0_bcast && file->mpi_rank == 0) { + /* If MPI_Get_elements(_x) fails for a rank 0 bcast strategy, + * push an error, but continue to participate in the following + * MPI_Bcast. + */ + bytes_read = -1; + HMPI_DONE_ERROR(FAIL, "MPI_Get_elements failed", mpi_code); + } + else + HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code); + } + } /* end if */ + + /* If the rank0-bcast feature was used, broadcast the # of bytes read to + * other ranks, which didn't perform any I/O. + */ + /* NOTE: This could be optimized further to be combined with the broadcast + * of the data. (QAK - 2019/1/2) + */ + if (rank0_bcast) +#if H5_CHECK_MPI_VERSION(3, 0) + if (MPI_SUCCESS != MPI_Bcast(&bytes_read, 1, MPI_COUNT, 0, file->comm)) +#else + if (MPI_SUCCESS != MPI_Bcast(&bytes_read, 1, MPI_INT, 0, file->comm)) +#endif + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", 0); + + /* Get the type's size */ +#if H5_CHECK_MPI_VERSION(3, 0) + if (MPI_SUCCESS != (mpi_code = MPI_Type_size_x(final_mtype, &type_size))) +#else + if (MPI_SUCCESS != (mpi_code = MPI_Type_size(final_mtype, &type_size))) +#endif + HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code); + + /* Compute the actual number of bytes requested */ + io_size = type_size * size_i; + + /* Check for read failure */ + if (bytes_read < 0 || bytes_read > io_size) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed"); + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_r_flag) + fprintf(stderr, "%s: (%d) mpi_off = %ld bytes_read = %lld type = %s\n", __func__, + file->mpi_rank, (long)mpi_off, (long long)bytes_read, H5FD__mem_t_to_str(type)); +#endif + + /* + * This gives us zeroes beyond end of physical MPI file. + */ + if ((n = (io_size - bytes_read)) > 0) + memset((char *)bufs[0] + bytes_read, 0, (size_t)n); + + } /* end if */ + else { +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_r_flag) + fprintf(stderr, "%s: (%d) doing MPI independent IO\n", __func__, file->mpi_rank); +#endif + if (_file->base_addr > 0) { + /* Undo base address addition in internal routines before passing down to the mpio driver */ + for (i = 0; i < count; i++) { + assert(offsets[i] >= _file->base_addr); + offsets[i] -= _file->base_addr; + } + } + + if (H5FD_read_from_selection(_file, type, (uint32_t)count, mem_space_ids, file_space_ids, offsets, + element_sizes, bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "read vector from selection failed"); + } + +done: + /* Free the MPI buf and file types, if they were derived */ + if (final_mtype_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&final_mtype))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code); + if (final_ftype_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&final_ftype))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code); + + /* Cleanup dataspace arrays */ + if (s_mem_spaces) + s_mem_spaces = H5MM_xfree(s_mem_spaces); + if (s_file_spaces) + s_file_spaces = H5MM_xfree(s_file_spaces); + + if (!selection_was_sorted) { + free(s_mem_space_ids); + s_mem_space_ids = NULL; + free(s_file_space_ids); + s_file_space_ids = NULL; + free(s_offsets); + s_offsets = NULL; + free(s_element_sizes); + s_element_sizes = NULL; + free(s_bufs); + s_bufs = NULL; + } + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_t_flag) + fprintf(stderr, "%s: (%d) Leaving\n", __func__, file->mpi_rank); +#endif + + FUNC_LEAVE_NOAPI(ret_value) + +} /* end H5FD__mpio_read_selection() */ + +/*------------------------------------------------------------------------- + * Function: H5FD__mpio_write_selection + * + * Purpose: The behaviour of this function dependes on the value of + * the transfer mode obtained from the context. + * + * If the transfer mode is H5FD_MPIO_COLLECTIVE: + * --sort the selections + * --set mpi_bufs_base + * --build the MPI derived types + * --perform MPI_File_set_view() + * --perform MPI_File_write_at_all() or MPI_File_write_at() + * depending on whether this is a H5FD_MPIO_COLLECTIVE_IO + * --calculate and set the file's eof for the bytes written + * + * If this is not H5FD_MPIO_COLLECTIVE: + * --undo possible base address addition in internal routines + * --call H5FD_write_vector_from_selection() to perform vector + * or scalar writes for the selections + * + * Return: Success: SUCCEED. + * Failure: FAIL. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD__mpio_write_selection(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id, size_t count, + hid_t mem_space_ids[], hid_t file_space_ids[], haddr_t offsets[], + size_t element_sizes[], const void *bufs[]) +{ + H5FD_mpio_t *file = (H5FD_mpio_t *)_file; + MPI_Offset mpi_off; + MPI_Offset save_mpi_off; /* Use at the end of the routine for setting local_eof */ + MPI_Status mpi_stat; /* Status from I/O operation */ + + int size_i; + H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode */ + H5FD_mpio_collective_opt_t coll_opt_mode; + + MPI_Datatype final_mtype; /* Final memory MPI datatype for all pieces with selection */ + hbool_t final_mtype_is_derived = FALSE; + + MPI_Datatype final_ftype; /* Final file MPI datatype for all pieces with selection */ + hbool_t final_ftype_is_derived = FALSE; + + hid_t *s_mem_space_ids = NULL; + hid_t *s_file_space_ids = NULL; + haddr_t *s_offsets = NULL; + size_t *s_element_sizes = NULL; + H5_flexible_const_ptr_t *s_bufs = NULL; + hbool_t selection_was_sorted = TRUE; + const void *mpi_bufs_base = NULL; + + uint32_t i, j; + H5S_t **s_mem_spaces = NULL; + H5S_t **s_file_spaces = NULL; + haddr_t tmp_offset = 0; + char unused = 0; /* Unused, except for non-NULL pointer value */ + H5_flexible_const_ptr_t mbb; + +#if H5_CHECK_MPI_VERSION(3, 0) + MPI_Count bytes_written; + MPI_Count type_size; /* MPI datatype used for I/O's size */ + MPI_Count io_size; /* Actual number of bytes requested */ +#else + int bytes_written; + int type_size; /* MPI datatype used for I/O's size */ + int io_size; /* Actual number of bytes requested */ +#endif + +#ifdef H5FDmpio_DEBUG + hbool_t H5FD_mpio_debug_t_flag = (H5FD_mpio_debug_flags_s[(int)'t'] && H5FD_MPIO_TRACE_THIS_RANK(file)); + hbool_t H5FD_mpio_debug_w_flag = (H5FD_mpio_debug_flags_s[(int)'w'] && H5FD_MPIO_TRACE_THIS_RANK(file)); +#endif + int mpi_code; /* MPI return code */ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_PACKAGE + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_t_flag) + fprintf(stderr, "%s: (%d) Entering\n", __func__, file->mpi_rank); +#endif + + /* Sanity checks */ + assert(file); + assert(H5FD_MPIO == file->pub.driver_id); + assert((count == 0) || (mem_space_ids)); + assert((count == 0) || (file_space_ids)); + assert((count == 0) || (offsets)); + assert((count == 0) || (element_sizes)); + assert((count == 0) || (bufs)); + + /* Verify that the first elements of the element_sizes and bufs arrays are + * valid. */ + assert((count == 0) || (element_sizes[0] != 0)); + assert((count == 0) || (bufs[0] != NULL)); + + /* Verify that no data is written when between MPI_Barrier()s during file flush */ + assert(!H5CX_get_mpi_file_flushing()); + + /* Portably initialize MPI status variable */ + memset(&mpi_stat, 0, sizeof(MPI_Status)); + + /* Get the transfer mode from the API context */ + if (H5CX_get_io_xfer_mode(&xfer_mode) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode"); + + if (xfer_mode == H5FD_MPIO_COLLECTIVE) { + + if (count) { + if (H5FD_sort_selection_io_req(&selection_was_sorted, count, mem_space_ids, file_space_ids, + offsets, element_sizes, (H5_flexible_const_ptr_t *)bufs, + &s_mem_space_ids, &s_file_space_ids, &s_offsets, &s_element_sizes, + &s_bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "can't sort selection I/O request"); + + tmp_offset = s_offsets[0]; + + if (NULL == (s_file_spaces = H5MM_malloc(count * sizeof(H5S_t *)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for file space list"); + if (NULL == (s_mem_spaces = H5MM_malloc(count * sizeof(H5S_t *)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for memory space list"); + + for (i = 0; i < count; i++) { + if (NULL == + (s_file_spaces[i] = (H5S_t *)H5I_object_verify(s_file_space_ids[i], H5I_DATASPACE))) + HGOTO_ERROR(H5E_VFL, H5E_BADTYPE, H5I_INVALID_HID, + "can't retrieve file dataspace from ID"); + if (NULL == (s_mem_spaces[i] = (H5S_t *)H5I_object_verify(s_mem_space_ids[i], H5I_DATASPACE))) + HGOTO_ERROR(H5E_VFL, H5E_BADTYPE, H5I_INVALID_HID, + "can't retrieve memory dataspace from ID"); + } + + /* when we setup mpi_bufs[] below, all addresses are offsets from + * mpi_bufs_base. + * + * Since these offsets must all be positive, we must scan through + * s_bufs[] to find the smallest value, and choose that for + * mpi_bufs_base. + */ + j = 0; /* guess at the index of the smallest value of s_bufs[] */ + + if (s_bufs[j + 1].cvp != NULL) { + for (i = 1; i < count; i++) + if (s_bufs[i].cvp < s_bufs[j].cvp) + j = i; + } + + mpi_bufs_base = s_bufs[j].cvp; + mbb.cvp = mpi_bufs_base; + + if (H5FD__selection_build_types(TRUE, count, mbb, s_file_spaces, s_mem_spaces, s_offsets, s_bufs, + s_element_sizes, s_element_sizes, &final_ftype, + &final_ftype_is_derived, &final_mtype, + &final_mtype_is_derived) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't build type for MPI-IO"); + + /* We have a single, complicated MPI datatype for both memory & file */ + size_i = 1; + } + else { + + /* No chunks selected for this process */ + size_i = 0; + + mpi_bufs_base = &unused; + + /* Set the MPI datatype */ + final_ftype = MPI_BYTE; + final_mtype = MPI_BYTE; + } + + /* some numeric conversions */ + if (H5FD_mpi_haddr_to_MPIOff(tmp_offset, &mpi_off /*out*/) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off"); + + /* To be used at the end of the routine for setting local_eof */ + save_mpi_off = mpi_off; + + /* + * Set the file view when we are using MPI derived types + */ + if (MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, final_ftype, + H5FD_mpi_native_g, file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code); + + /* Reset mpi_off to 0 since the view now starts at the data offset */ + if (H5FD_mpi_haddr_to_MPIOff((haddr_t)0, &mpi_off) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't set MPI off to 0"); + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_w_flag) + fprintf(stderr, "%s: (%d) using MPIO collective mode\n", __func__, file->mpi_rank); +#endif + + /* Get the collective_opt property to check whether the application wants to do IO individually. */ + if (H5CX_get_mpio_coll_opt(&coll_opt_mode) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O collective_op property"); + + if (coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO) { + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_w_flag) + fprintf(stderr, "%s: (%d) doing MPI collective IO\n", __func__, file->mpi_rank); +#endif + + /* Perform collective write operation */ + if (MPI_SUCCESS != (mpi_code = MPI_File_write_at_all(file->f, mpi_off, mpi_bufs_base, size_i, + final_mtype, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code); + + /* Do MPI_File_sync when needed by underlying ROMIO driver */ + if (file->mpi_file_sync_required) { + if (MPI_SUCCESS != (mpi_code = MPI_File_sync(file->f))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_sync failed", mpi_code); + } + } + else { + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_w_flag) + fprintf(stderr, "%s: (%d) doing MPI independent IO\n", __func__, file->mpi_rank); +#endif + /* Perform independent write operation */ + if (MPI_SUCCESS != (mpi_code = MPI_File_write_at(file->f, mpi_off, mpi_bufs_base, size_i, + final_mtype, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code); + } /* end else */ + + /* Reset the file view when we used MPI derived types */ + if (MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, + H5FD_mpi_native_g, file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code); + + /* How many bytes were actually written */ +#if H5_CHECK_MPI_VERSION(3, 0) + if (MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, final_mtype, &bytes_written))) +#else + if (MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_written))) +#endif + HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code); + + /* Get the type's size */ +#if H5_CHECK_MPI_VERSION(3, 0) + if (MPI_SUCCESS != (mpi_code = MPI_Type_size_x(final_mtype, &type_size))) +#else + if (MPI_SUCCESS != (mpi_code = MPI_Type_size(final_mtype, &type_size))) +#endif + HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code); + + /* Compute the actual number of bytes requested */ + io_size = type_size * size_i; + + /* Check for write failure */ + if (bytes_written != io_size || bytes_written < 0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "file write failed"); + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_w_flag) + fprintf(stderr, "%s: (%d) mpi_off = %ld bytes_written = %lld type = %s\n", __func__, + file->mpi_rank, (long)mpi_off, (long long)bytes_written, H5FD__mem_t_to_str(type)); +#endif + + /* Each process will keep track of its perceived EOF value locally, and + * ultimately we will reduce this value to the maximum amongst all + * processes, but until then keep the actual eof at HADDR_UNDEF just in + * case something bad happens before that point. (rather have a value + * we know is wrong sitting around rather than one that could only + * potentially be wrong.) */ + file->eof = HADDR_UNDEF; + + if (bytes_written && (((haddr_t)bytes_written + (haddr_t)save_mpi_off) > file->local_eof)) + file->local_eof = (haddr_t)save_mpi_off + (haddr_t)bytes_written; + } + else { /* Not H5FD_MPIO_COLLECTIVE */ + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_w_flag) + fprintf(stderr, "%s: (%d) doing MPI independent IO\n", __func__, file->mpi_rank); +#endif + if (_file->base_addr > 0) { + /* Undo base address addition in internal routines before passing down to the mpio driver */ + for (i = 0; i < count; i++) { + assert(offsets[i] >= _file->base_addr); + offsets[i] -= _file->base_addr; + } + } + + if (H5FD_write_from_selection(_file, type, (uint32_t)count, mem_space_ids, file_space_ids, offsets, + element_sizes, bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "write vector from selection failed"); + } + +done: + /* Free the MPI buf and file types, if they were derived */ + if (final_mtype_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&final_mtype))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code); + if (final_ftype_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&final_ftype))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code); + + /* Cleanup dataspace arrays */ + if (s_mem_spaces) + s_mem_spaces = H5MM_xfree(s_mem_spaces); + if (s_file_spaces) + s_file_spaces = H5MM_xfree(s_file_spaces); + + if (!selection_was_sorted) { + free(s_mem_space_ids); + s_mem_space_ids = NULL; + free(s_file_space_ids); + s_file_space_ids = NULL; + free(s_offsets); + s_offsets = NULL; + free(s_element_sizes); + s_element_sizes = NULL; + free(s_bufs); + s_bufs = NULL; + } + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_t_flag) + fprintf(stderr, "%s: (%d) Leaving: ret_value = %d\n", __func__, file->mpi_rank, ret_value); +#endif + + FUNC_LEAVE_NOAPI(ret_value) + +} /* end H5FD__mpio_write_selection() */ + +/*------------------------------------------------------------------------- * Function: H5FD__mpio_flush * * Purpose: Makes sure that all data is on disk. This is collective. |