From 4766d282ff21bf7dad3852696b33869beff12415 Mon Sep 17 00:00:00 2001 From: Quincey Koziol Date: Wed, 21 Mar 2018 00:09:16 -0500 Subject: Corrections for parallel I/O & tests. --- src/H5CX.c | 1 + src/H5Dio.c | 9 ++++---- src/H5Dmpio.c | 69 +++++++++++++++++++------------------------------------ testpar/t_cache.c | 6 +++++ testpar/t_dset.c | 48 +++++++++++++++++++------------------- 5 files changed, 58 insertions(+), 75 deletions(-) diff --git a/src/H5CX.c b/src/H5CX.c index 0f98a15..dd1d74f 100644 --- a/src/H5CX.c +++ b/src/H5CX.c @@ -2723,6 +2723,7 @@ H5CX__pop_common(void) H5CX_SET_PROP(H5D_XFER_COLL_CHUNK_LINK_NUM_TRUE_NAME, mpio_coll_chunk_link_num_true) H5CX_SET_PROP(H5D_XFER_COLL_CHUNK_LINK_NUM_FALSE_NAME, mpio_coll_chunk_link_num_false) H5CX_SET_PROP(H5D_XFER_COLL_CHUNK_MULTI_RATIO_COLL_NAME, mpio_coll_chunk_multi_ratio_coll) + H5CX_SET_PROP(H5D_XFER_COLL_CHUNK_MULTI_RATIO_IND_NAME, mpio_coll_chunk_multi_ratio_ind) #endif /* H5_HAVE_PARALLEL */ /* Pop the top context node from the stack */ diff --git a/src/H5Dio.c b/src/H5Dio.c index 334f18f..cd6b627 100644 --- a/src/H5Dio.c +++ b/src/H5Dio.c @@ -57,8 +57,7 @@ static herr_t H5D__typeinfo_init(const H5D_t *dset, hid_t mem_type_id, hbool_t do_write, H5D_type_info_t *type_info); #ifdef H5_HAVE_PARALLEL static herr_t H5D__ioinfo_adjust(H5D_io_info_t *io_info, const H5D_t *dset, - const H5S_t *file_space, const H5S_t *mem_space, const H5D_type_info_t *type_info, - const H5D_chunk_map_t *fm); + const H5S_t *file_space, const H5S_t *mem_space, const H5D_type_info_t *type_info); #endif /* H5_HAVE_PARALLEL */ static herr_t H5D__typeinfo_term(const H5D_type_info_t *type_info); @@ -562,7 +561,7 @@ H5D__read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, #ifdef H5_HAVE_PARALLEL /* Adjust I/O info for any parallel I/O */ - if(H5D__ioinfo_adjust(&io_info, dataset, file_space, mem_space, &type_info, fm) < 0) + if(H5D__ioinfo_adjust(&io_info, dataset, file_space, mem_space, &type_info) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to adjust I/O info for parallel I/O") #endif /*H5_HAVE_PARALLEL*/ @@ -789,7 +788,7 @@ H5D__write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, #ifdef H5_HAVE_PARALLEL /* Adjust I/O info for any parallel I/O */ - if(H5D__ioinfo_adjust(&io_info, dataset, file_space, mem_space, &type_info, fm) < 0) + if(H5D__ioinfo_adjust(&io_info, dataset, file_space, mem_space, &type_info) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to adjust I/O info for parallel I/O") #endif /*H5_HAVE_PARALLEL*/ @@ -1091,7 +1090,7 @@ done: static herr_t H5D__ioinfo_adjust(H5D_io_info_t *io_info, const H5D_t *dset, const H5S_t *file_space, const H5S_t *mem_space, - const H5D_type_info_t *type_info, const H5D_chunk_map_t *fm) + const H5D_type_info_t *type_info) { herr_t ret_value = SUCCEED; /* Return value */ diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c index 669da68..eb7aad9 100644 --- a/src/H5Dmpio.c +++ b/src/H5Dmpio.c @@ -37,12 +37,13 @@ #include "H5Eprivate.h" /* Error handling */ #include "H5Fprivate.h" /* File access */ #include "H5FDprivate.h" /* File drivers */ +#include "H5FDmpi.h" /* MPI-based file drivers */ #include "H5Iprivate.h" /* IDs */ #include "H5MMprivate.h" /* Memory management */ #include "H5Oprivate.h" /* Object headers */ #include "H5Pprivate.h" /* Property lists */ #include "H5Sprivate.h" /* Dataspaces */ -#include "H5VMprivate.h" /* Vector */ +#include "H5VMprivate.h" /* Vector */ #ifdef H5_HAVE_PARALLEL @@ -77,13 +78,11 @@ */ /* Macros to represent different IO modes(NONE, Independent or collective)for multiple chunk IO case */ -#define H5D_CHUNK_IO_MODE_IND 0 #define H5D_CHUNK_IO_MODE_COL 1 /* Macros to represent the regularity of the selection for multiple chunk IO case. */ #define H5D_CHUNK_SELECT_REG 1 -#define H5D_CHUNK_SELECT_IRREG 2 -#define H5D_CHUNK_SELECT_NONE 0 + /******************/ /* Local Typedefs */ @@ -225,8 +224,6 @@ static herr_t H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm, H5D_chunk_addr_info_t chunk_addr_info_array[], int many_chunk_opt); static herr_t H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_chunk_map_t *fm, uint8_t assign_io_mode[], haddr_t chunk_addr[]); -static herr_t H5D__mpio_get_min_chunk(const H5D_io_info_t *io_info, - const H5D_chunk_map_t *fm, int *min_chunkf); static herr_t H5D__mpio_get_sum_chunk(const H5D_io_info_t *io_info, const H5D_chunk_map_t *fm, int *sum_chunkf); static herr_t H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, @@ -280,8 +277,8 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space, const H5S_t *mem_space, const H5D_type_info_t *type_info) { H5FD_mpio_xfer_t io_xfer_mode; /* MPI I/O transfer mode */ - int local_cause = 0; /* Local reason(s) for breaking collective mode */ - int global_cause = 0; /* Global reason(s) for breaking collective mode */ + unsigned local_cause = 0; /* Local reason(s) for breaking collective mode */ + unsigned global_cause = 0; /* Global reason(s) for breaking collective mode */ htri_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_PACKAGE @@ -342,7 +339,7 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space, /* Form consensus opinion among all processes about whether to perform * collective I/O */ - if(MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_cause, &global_cause, 1, MPI_INT, MPI_BOR, io_info->comm))) + if(MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_cause, &global_cause, 1, MPI_UNSIGNED, MPI_BOR, io_info->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code) } /* end else */ @@ -525,41 +522,6 @@ done: /*------------------------------------------------------------------------- - * Function: H5D__mpio_get_min_chunk - * - * Purpose: Routine for obtaining minimum number of chunks to cover - * hyperslab selection selected by all processors. - * - * Return: Non-negative on success/Negative on failure - * - * Programmer: Muqun Yang - * Monday, Feb. 13th, 2006 - * - *------------------------------------------------------------------------- - */ -static herr_t -H5D__mpio_get_min_chunk(const H5D_io_info_t *io_info, const H5D_chunk_map_t *fm, - int *min_chunkf) -{ - int num_chunkf; /* Number of chunks to iterate over */ - int mpi_code; /* MPI return code */ - herr_t ret_value = SUCCEED; - - FUNC_ENTER_STATIC - - /* Get the number of chunks to perform I/O on */ - H5_CHECKED_ASSIGN(num_chunkf, int, H5SL_count(fm->sel_chunks), size_t) - - /* Determine the minimum # of chunks for all processes */ - if(MPI_SUCCESS != (mpi_code = MPI_Allreduce(&num_chunkf, min_chunkf, 1, MPI_INT, MPI_MIN, io_info->comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code) - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5D__mpio_get_min_chunk() */ - - -/*------------------------------------------------------------------------- * Function: H5D__mpio_get_sum_chunk * * Purpose: Routine for obtaining total number of chunks to cover @@ -3070,19 +3032,34 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk * read from the file and unfiltered. */ if(!chunk_entry->full_overwrite || io_info->op_type == H5D_IO_OP_READ) { + H5FD_mpio_xfer_t xfer_mode; /* Parallel transfer for this request */ + chunk_entry->chunk_states.new_chunk.length = chunk_entry->chunk_states.chunk_current.length; /* Currently, these chunk reads are done independently and will likely * cause issues with collective metadata reads enabled. In the future, * this should be refactored to use collective chunk reads - JTH */ + + /* Get the original state of parallel I/O transfer mode */ + if(H5CX_get_io_xfer_mode(&xfer_mode) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode") + + /* Change the xfer_mode to independent for handling the I/O */ + if(H5CX_set_io_xfer_mode(H5FD_MPIO_INDEPENDENT) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTSET, FAIL, "can't set MPI-I/O transfer mode") + if(H5F_block_read(io_info->dset->oloc.file, H5FD_MEM_DRAW, chunk_entry->chunk_states.chunk_current.offset, chunk_entry->chunk_states.new_chunk.length, chunk_entry->buf) < 0) - HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to read raw data chunk") + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "unable to read raw data chunk") + + /* Return to the original I/O transfer mode setting */ + if(H5CX_set_io_xfer_mode(xfer_mode) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTSET, FAIL, "can't set MPI-I/O transfer mode") if(H5Z_pipeline(&io_info->dset->shared->dcpl_cache.pline, H5Z_FLAG_REVERSE, &filter_mask, err_detect, filter_cb, (size_t *)&chunk_entry->chunk_states.new_chunk.length, &buf_size, &chunk_entry->buf) < 0) - HGOTO_ERROR(H5E_PLINE, H5E_CANTFILTER, FAIL, "couldn't unfilter chunk for modifying") + HGOTO_ERROR(H5E_DATASET, H5E_CANTFILTER, FAIL, "couldn't unfilter chunk for modifying") } /* end if */ else { chunk_entry->chunk_states.new_chunk.length = true_chunk_size; diff --git a/testpar/t_cache.c b/testpar/t_cache.c index 63b6ac7..caa578e 100644 --- a/testpar/t_cache.c +++ b/testpar/t_cache.c @@ -4075,6 +4075,9 @@ setup_cache_for_test(hid_t * fid_ptr, fid = H5Fcreate(filenames[0], H5F_ACC_TRUNC, H5P_DEFAULT, fapl); + /* Push API context */ + H5CX_push(); + if ( fid < 0 ) { nerrors++; if ( verbose ) { @@ -4556,6 +4559,9 @@ take_down_cache(hid_t fid, H5C_t * cache_ptr) } + /* Pop API context */ + H5CX_pop(); + if ( success ) { if ( world_mpi_rank == world_server_mpi_rank ) { diff --git a/testpar/t_dset.c b/testpar/t_dset.c index 65d1bb4..b315772 100644 --- a/testpar/t_dset.c +++ b/testpar/t_dset.c @@ -679,8 +679,8 @@ dataset_writeAll(void) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { - ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); - VRFY((ret>= 0),"set independent IO collectively succeeded"); + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); } @@ -748,8 +748,8 @@ dataset_writeAll(void) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); VRFY((ret >= 0), "H5Pcreate xfer succeeded"); if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { - ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); - VRFY((ret>= 0),"set independent IO collectively succeeded"); + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); } @@ -819,8 +819,8 @@ dataset_writeAll(void) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); VRFY((ret >= 0), "H5Pcreate xfer succeeded"); if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { - ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); - VRFY((ret>= 0),"set independent IO collectively succeeded"); + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); } @@ -884,8 +884,8 @@ dataset_writeAll(void) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); VRFY((ret >= 0), "H5Pcreate xfer succeeded"); if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { - ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); - VRFY((ret>= 0),"set independent IO collectively succeeded"); + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); } /* write data collectively */ @@ -1184,8 +1184,8 @@ dataset_readAll(void) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); VRFY((ret >= 0), "H5Pcreate xfer succeeded"); if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { - ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); - VRFY((ret>= 0),"set independent IO collectively succeeded"); + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); } @@ -1252,8 +1252,8 @@ dataset_readAll(void) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); VRFY((ret >= 0), "H5Pcreate xfer succeeded"); if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { - ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); - VRFY((ret>= 0),"set independent IO collectively succeeded"); + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); } @@ -1335,8 +1335,8 @@ dataset_readAll(void) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); VRFY((ret >= 0), "H5Pcreate xfer succeeded"); if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { - ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); - VRFY((ret>= 0),"set independent IO collectively succeeded"); + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); } /* read data collectively */ @@ -2200,8 +2200,8 @@ extend_writeAll(void) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { - ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); - VRFY((ret>= 0),"set independent IO collectively succeeded"); + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); } @@ -2240,8 +2240,8 @@ extend_writeAll(void) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { - ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); - VRFY((ret>= 0),"set independent IO collectively succeeded"); + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); } @@ -2414,8 +2414,8 @@ extend_readAll(void) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { - ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); - VRFY((ret>= 0),"set independent IO collectively succeeded"); + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); } @@ -2461,8 +2461,8 @@ extend_readAll(void) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { - ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); - VRFY((ret>= 0),"set independent IO collectively succeeded"); + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); } @@ -2634,8 +2634,8 @@ compress_readAll(void) ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { - ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); - VRFY((ret>= 0),"set independent IO collectively succeeded"); + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); } -- cgit v0.12