diff options
-rw-r--r-- | src/H5Dmpio.c | 302 | ||||
-rw-r--r-- | src/H5Dprivate.h | 2 | ||||
-rw-r--r-- | src/H5Ppublic.h | 3 | ||||
-rw-r--r-- | src/H5trace.c | 4 | ||||
-rw-r--r-- | testpar/t_coll_chunk.c | 36 | ||||
-rw-r--r-- | testpar/t_dset.c | 104 | ||||
-rw-r--r-- | testpar/testphdf5.c | 2 | ||||
-rw-r--r-- | testpar/testphdf5.h | 9 |
8 files changed, 117 insertions, 345 deletions
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c index c2d964e..900ad6a 100644 --- a/src/H5Dmpio.c +++ b/src/H5Dmpio.c @@ -105,10 +105,9 @@ static herr_t H5D__chunk_collective_io(H5D_io_info_t *io_info, static herr_t H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, H5D_chunk_map_t *fm, H5P_genplist_t *dx_plist); -static herr_t H5D__multi_chunk_collective_io_no_opt(H5D_io_info_t *io_info, - const H5D_type_info_t *type_info, H5D_chunk_map_t *fm, H5P_genplist_t *dx_plist); static herr_t H5D__link_chunk_collective_io(H5D_io_info_t *io_info, - const H5D_type_info_t *type_info, H5D_chunk_map_t *fm, int sum_chunk); + const H5D_type_info_t *type_info, H5D_chunk_map_t *fm, int sum_chunk, + H5P_genplist_t *dx_plist); static herr_t H5D__inter_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, const H5S_t *file_space, const H5S_t *mem_space); @@ -586,6 +585,12 @@ done: * Programmer: Muqun Yang * Monday, Feb. 13th, 2006 * + * Modification: + * - Refctore to remove multi-chunk-without-opimization feature and update for + * multi-chunk-io accordingly + * Programmer: Jonathan Kim + * Date: 2012-10-10 + * *------------------------------------------------------------------------- */ static herr_t @@ -594,8 +599,6 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf { H5P_genplist_t *dx_plist; /* Pointer to DXPL */ H5FD_mpio_chunk_opt_t chunk_opt_mode; - H5D_mpio_actual_chunk_opt_mode_t actual_chunk_opt_mode; - H5D_mpio_actual_io_mode_t actual_io_mode; int io_option = H5D_MULTI_CHUNK_IO_MORE_OPT; int sum_chunk = -1; #ifdef H5_HAVE_INSTRUMENTED_LIBRARY @@ -617,10 +620,12 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf /* Check the optional property list on what to do with collective chunk IO. */ chunk_opt_mode = (H5FD_mpio_chunk_opt_t)H5P_peek_unsigned(dx_plist, H5D_XFER_MPIO_CHUNK_OPT_HARD_NAME); - if(chunk_opt_mode == H5FD_MPIO_CHUNK_ONE_IO) + if(H5FD_MPIO_CHUNK_ONE_IO == chunk_opt_mode) io_option = H5D_ONE_LINK_CHUNK_IO; /*no opt*/ - else if(chunk_opt_mode == H5FD_MPIO_CHUNK_MULTI_IO) - io_option = H5D_MULTI_CHUNK_IO; /*no opt */ + /* direct request to multi-chunk-io */ + else if(H5FD_MPIO_CHUNK_MULTI_IO == chunk_opt_mode) + io_option = H5D_MULTI_CHUNK_IO; + /* via default path. branch by num threshold */ else { unsigned one_link_chunk_io_threshold; /* Threshhold to use single collective I/O for all chunks */ int mpi_size; /* Number of processes in MPI job */ @@ -649,7 +654,7 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf /*** Test collective chunk user-input optimization APIs. ***/ check_prop = H5Pexist(io_info->dxpl_id, H5D_XFER_COLL_CHUNK_LINK_HARD_NAME); if(check_prop > 0) { - if(io_option == H5D_ONE_LINK_CHUNK_IO) { + if(H5D_ONE_LINK_CHUNK_IO == io_option) { new_value = 0; if(H5Pset(io_info->dxpl_id, H5D_XFER_COLL_CHUNK_LINK_HARD_NAME, &new_value) < 0) HGOTO_ERROR(H5E_IO, H5E_CANTSET, FAIL, "unable to set property value") @@ -657,7 +662,7 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf } /* end if */ check_prop = H5Pexist(io_info->dxpl_id, H5D_XFER_COLL_CHUNK_MULTI_HARD_NAME); if(check_prop > 0) { - if(io_option == H5D_MULTI_CHUNK_IO) { + if(H5D_MULTI_CHUNK_IO == io_option) { new_value = 0; if(H5Pset(io_info->dxpl_id, H5D_XFER_COLL_CHUNK_MULTI_HARD_NAME, &new_value) < 0) HGOTO_ERROR(H5E_IO, H5E_CANTSET, FAIL, "unable to set property value") @@ -665,7 +670,7 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf } /* end if */ check_prop = H5Pexist(io_info->dxpl_id, H5D_XFER_COLL_CHUNK_LINK_NUM_TRUE_NAME); if(check_prop > 0) { - if(io_option == H5D_ONE_LINK_CHUNK_IO_MORE_OPT) { + if(H5D_ONE_LINK_CHUNK_IO_MORE_OPT == io_option) { new_value = 0; if(H5Pset(io_info->dxpl_id, H5D_XFER_COLL_CHUNK_LINK_NUM_TRUE_NAME, &new_value) < 0) HGOTO_ERROR(H5E_IO, H5E_CANTSET, FAIL, "unable to set property value") @@ -682,39 +687,16 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf #endif /* step 2: Go ahead to do IO.*/ - if(io_option == H5D_ONE_LINK_CHUNK_IO || io_option == H5D_ONE_LINK_CHUNK_IO_MORE_OPT) { - /* set the actual io mode properties to the correct values for link chunk io. - * Link chunk I/O does not break to independent, so we can set the actual_io mode - * as well as the optimisation mode. */ - actual_chunk_opt_mode = H5D_MPIO_LINK_CHUNK; - actual_io_mode = H5D_MPIO_CHUNK_COLLECTIVE; - - /* Set the actual chunk opt mode property. */ - if(H5P_set(dx_plist, H5D_MPIO_ACTUAL_CHUNK_OPT_MODE_NAME, &actual_chunk_opt_mode) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "couldn't set actual chunk opt mode property") - - if(H5D__link_chunk_collective_io(io_info, type_info, fm, sum_chunk) < 0) + if(H5D_ONE_LINK_CHUNK_IO == io_option || H5D_ONE_LINK_CHUNK_IO_MORE_OPT == io_option) { + if(H5D__link_chunk_collective_io(io_info, type_info, fm, sum_chunk, dx_plist) < 0) HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish linked chunk MPI-IO") - - /* Set the actual io mode property. */ - if(H5P_set(dx_plist, H5D_MPIO_ACTUAL_IO_MODE_NAME, &actual_io_mode) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "couldn't set actual io mode property") } /* end if */ - else - if(io_option == H5D_MULTI_CHUNK_IO) { - /* Set the actual chunk opt mode property */ - actual_chunk_opt_mode = H5D_MPIO_MULTI_CHUNK_NO_OPT; - if(H5P_set(dx_plist, H5D_MPIO_ACTUAL_CHUNK_OPT_MODE_NAME, &actual_chunk_opt_mode) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "couldn't set actual chunk opt mode property") - - if(H5D__multi_chunk_collective_io_no_opt(io_info, type_info, fm, dx_plist) < 0) - HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish multiple chunk MPI-IO") + /* direct request to multi-chunk-io */ + else if(H5D_MULTI_CHUNK_IO == io_option) { + if(H5D__multi_chunk_collective_io(io_info, type_info, fm, dx_plist) < 0) + HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish optimized multiple chunk MPI-IO") } /* end if */ - else { /*multiple chunk IOs with opt */ - actual_chunk_opt_mode = H5D_MPIO_MULTI_CHUNK; - if(H5P_set(dx_plist, H5D_MPIO_ACTUAL_CHUNK_OPT_MODE_NAME, &actual_chunk_opt_mode) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "couldn't set actual chunk opt mode property") - + else { /* multiple chunk IO via threshold */ if(H5D__multi_chunk_collective_io(io_info, type_info, fm, dx_plist) < 0) HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish optimized multiple chunk MPI-IO") } /* end else */ @@ -801,11 +783,16 @@ done: * Programmer: Muqun Yang * Monday, Feb. 13th, 2006 * + * Modification: + * - Set H5D_MPIO_ACTUAL_CHUNK_OPT_MODE_NAME and H5D_MPIO_ACTUAL_IO_MODE_NAME + * dxpl in this. + * Programmer: Jonathan Kim + * Date: 2012-10-10 *------------------------------------------------------------------------- */ static herr_t H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, - H5D_chunk_map_t *fm, int sum_chunk) + H5D_chunk_map_t *fm, int sum_chunk, H5P_genplist_t *dx_plist) { H5D_chunk_addr_info_t *chunk_addr_info_array = NULL; MPI_Datatype chunk_final_mtype; /* Final memory MPI datatype for all chunks with seletion */ @@ -824,10 +811,21 @@ H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *typ int *chunk_mpi_file_counts = NULL; /* Count of MPI file datatype for each chunk */ int *chunk_mpi_mem_counts = NULL; /* Count of MPI memory datatype for each chunk */ int mpi_code; /* MPI return code */ + H5D_mpio_actual_chunk_opt_mode_t actual_chunk_opt_mode = H5D_MPIO_LINK_CHUNK; + H5D_mpio_actual_io_mode_t actual_io_mode = H5D_MPIO_CHUNK_COLLECTIVE; herr_t ret_value = SUCCEED; FUNC_ENTER_STATIC + /* Set the actual-chunk-opt-mode property. */ + if(H5P_set(dx_plist, H5D_MPIO_ACTUAL_CHUNK_OPT_MODE_NAME, &actual_chunk_opt_mode) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "couldn't set actual chunk opt mode property") + + /* Set the actual-io-mode property. + * Link chunk I/O does not break to independent, so can set right away */ + if(H5P_set(dx_plist, H5D_MPIO_ACTUAL_IO_MODE_NAME, &actual_io_mode) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "couldn't set actual io mode property") + /* Get the sum # of chunks, if not already available */ if(sum_chunk < 0) { if(H5D__mpio_get_sum_chunk(io_info, fm, &sum_chunk) < 0) @@ -1075,6 +1073,12 @@ if(H5DEBUG(D)) * Programmer: Muqun Yang * Monday, Feb. 13th, 2006 * + * Modification: + * - Set H5D_MPIO_ACTUAL_CHUNK_OPT_MODE_NAME dxpl in this to go along with + * setting H5D_MPIO_ACTUAL_IO_MODE_NAME dxpl at the bottom. + * Programmer: Jonathan Kim + * Date: 2012-10-10 + * *------------------------------------------------------------------------- */ static herr_t @@ -1096,11 +1100,16 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty int mpi_rank; #endif size_t u; /* Local index variable */ + H5D_mpio_actual_chunk_opt_mode_t actual_chunk_opt_mode = H5D_MPIO_MULTI_CHUNK; /* actual chunk optimization mode */ H5D_mpio_actual_io_mode_t actual_io_mode = H5D_MPIO_NO_COLLECTIVE; /* Local variable for tracking the I/O mode used. */ herr_t ret_value = SUCCEED; FUNC_ENTER_STATIC + /* Set the actual chunk opt mode property */ + if(H5P_set(dx_plist, H5D_MPIO_ACTUAL_CHUNK_OPT_MODE_NAME, &actual_chunk_opt_mode) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "couldn't set actual chunk opt mode property") + #ifdef H5Dmpio_DEBUG mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file); #endif @@ -1263,210 +1272,6 @@ done: } /* end H5D__multi_chunk_collective_io */ -/*------------------------------------------------------------------------- - * Function: H5D__multi_chunk_collective_io_no_opt - * - * Purpose: To do collective IO without any optimization per chunk base - * The internal independent IO inside HDF5 cannot handle - * non-contiguous(or with holes) storage efficiently. - * Under this case, the one independent IO call may consist of - * many small disk IOs. So we may use independent IO with derived datatype - * to replace the independent IO when we find this chunk is not good to - * do collective IO. However, according to our performance study, - * this approach may not overcome the overhead caused by MPI gather/scatter. - * So we decide to leave the original collective IO per chunk approach as - * an option for users. NO MPI gather/scatter calls are used. - * HDF5 will try to collective IO if possible. - * If users choose to use - * H5Pset_dxpl_mpio_chunk_opt(dxpl_id,H5FD_MPIO_OPT_MULTI_IO), - * this function will be called. - * The HDF5 library won't do any IO management but leave it to MPI-IO to figure - * out. - * - * Return: Non-negative on success/Negative on failure - * - * Programmer: Muqun Yang - * Monday, Feb. 13th, 2006 - * - *------------------------------------------------------------------------- - */ -static herr_t -H5D__multi_chunk_collective_io_no_opt(H5D_io_info_t *io_info, - const H5D_type_info_t *type_info, H5D_chunk_map_t *fm, H5P_genplist_t *dx_plist) -{ - H5SL_node_t *chunk_node; /* Current node in chunk skip list */ - H5D_io_info_t ctg_io_info; /* Contiguous I/O info object */ - H5D_storage_t ctg_store; /* Chunk storage information as contiguous dataset */ - H5D_io_info_t cpt_io_info; /* Compact I/O info object */ - H5D_storage_t cpt_store; /* Chunk storage information as compact dataset */ - hbool_t cpt_dirty; /* Temporary placeholder for compact storage "dirty" flag */ - int min_chunk = -1; /* Minimum # of chunks all processes will operate on */ - int count_chunk; /* How many chunks have we operated on? */ - H5D_storage_t store; /* union of EFL and chunk pointer in file space */ - H5D_mpio_actual_io_mode_t actual_io_mode = H5D_MPIO_NO_COLLECTIVE; /*Local variable for tracking the I/O modes used. */ - herr_t ret_value = SUCCEED; - - FUNC_ENTER_STATIC - -#ifdef H5D_DEBUG -if(H5DEBUG(D)) { - int mpi_rank; - - mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file); - HDfprintf(H5DEBUG(D), "Rank %d: coming to multi_chunk_collective_io_no_opt\n", mpi_rank); -} -#endif - - /* Set up contiguous I/O info object */ - HDmemcpy(&ctg_io_info, io_info, sizeof(ctg_io_info)); - ctg_io_info.store = &ctg_store; - ctg_io_info.layout_ops = *H5D_LOPS_CONTIG; - - /* Initialize temporary contiguous storage info */ - ctg_store.contig.dset_size = (hsize_t)io_info->dset->shared->layout.u.chunk.size; - - /* Set up compact I/O info object */ - HDmemcpy(&cpt_io_info, io_info, sizeof(cpt_io_info)); - cpt_io_info.store = &cpt_store; - cpt_io_info.layout_ops = *H5D_LOPS_COMPACT; - - /* Initialize temporary compact storage info */ - cpt_store.compact.dirty = &cpt_dirty; - - /* Set dataset storage for I/O info */ - io_info->store = &store; - - /* Get the min. # of chunks */ - if(H5D__mpio_get_min_chunk(io_info, fm, &min_chunk) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get minimum number of chunk") - HDassert(min_chunk >= 0); - - /* Get first node in chunk skip list */ - chunk_node = H5SL_first(fm->sel_chunks); - count_chunk = 0; - - /* Iterate through chunks to be operated on */ - while(chunk_node) { - H5D_chunk_info_t *chunk_info; /* chunk information */ - H5D_chunk_ud_t udata; /* B-tree pass-through */ - hbool_t make_ind, make_coll; /* Flags to indicate that the MPI mode should change */ - - /* Get the actual chunk information from the skip list node */ - chunk_info = H5SL_item(chunk_node); - - /* Pass in chunk's coordinates in a union. */ - store.chunk.offset = chunk_info->coords; - store.chunk.index = chunk_info->index; - - /* Reset flags for changing parallel I/O mode */ - make_ind = make_coll = FALSE; - - count_chunk++; - - /* If the number of chunk is greater than minimum number of chunk, - * Do independent read. - */ - if(count_chunk > min_chunk) - /* Switch to independent I/O (permanently) */ - make_ind = TRUE; - - /* Retrieve the chunk's address */ - if(H5D__chunk_lookup(io_info->dset, io_info->dxpl_id, chunk_info->coords, - chunk_info->index, &udata) < 0) - HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL, "couldn't get chunk info from skipped list") - - /* Independent I/O */ - if(make_ind) { - void *chunk; /* Pointer to the data chunk in cache */ - H5D_io_info_t *chk_io_info; /* Pointer to I/O info object for this chunk */ - uint32_t accessed_bytes = 0; /* Total accessed size in a chunk */ - htri_t cacheable; /* Whether the chunk is cacheable */ - - /* Switch to independent I/O */ - if(H5D__ioinfo_xfer_mode(io_info, dx_plist, H5FD_MPIO_INDEPENDENT) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't switch to independent I/O") - - /* Update the local variable tracking the dxpl's actual io mode */ - actual_io_mode = actual_io_mode | H5D_MPIO_CHUNK_INDEPENDENT; - - /* Load the chunk into cache and lock it. */ - if((cacheable = H5D__chunk_cacheable(io_info, udata.addr, - io_info->op_type == H5D_IO_OP_WRITE)) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't tell if chunk is cacheable") - if(cacheable) { - hbool_t entire_chunk = TRUE; /* Whether whole chunk is selected */ - - /* Compute # of bytes accessed in chunk */ - accessed_bytes = chunk_info->chunk_points * type_info->src_type_size; - - /* Determine if we will access all the data in the chunk */ - if(((io_info->op_type == H5D_IO_OP_WRITE) && (accessed_bytes != ctg_store.contig.dset_size)) - || (io_info->op_type != H5D_IO_OP_WRITE)) - entire_chunk = FALSE; - - /* Lock the chunk into the cache */ - if(NULL == (chunk = H5D__chunk_lock(io_info, &udata, entire_chunk))) - HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to read raw data chunk") - - /* Set up the storage buffer information for this chunk */ - cpt_store.compact.buf = chunk; - - /* Point I/O info at contiguous I/O info for this chunk */ - chk_io_info = &cpt_io_info; - } /* end if */ - else { - /* Set up the storage address information for this chunk */ - ctg_store.contig.dset_addr = udata.addr; - - /* No chunk cached */ - chunk = NULL; - - /* Point I/O info at temporary I/O info for this chunk */ - chk_io_info = &ctg_io_info; - } /* end else */ - - if(io_info->op_type == H5D_IO_OP_WRITE) { - if((io_info->io_ops.single_write)(chk_io_info, type_info, - (hsize_t)chunk_info->chunk_points, chunk_info->fspace, chunk_info->mspace) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") - } /* end if */ - else { - if((io_info->io_ops.single_read)(chk_io_info, type_info, - (hsize_t)chunk_info->chunk_points, chunk_info->fspace, chunk_info->mspace) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") - } /* end ese */ - - /* Release the cache lock on the chunk. */ - if(chunk) - if(H5D__chunk_unlock(io_info, &udata, (io_info->op_type == H5D_IO_OP_WRITE), chunk, accessed_bytes) < 0) - HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to unlock raw data chunk") - } /* end if */ - else { /*collective I/O */ - /* Set up the storage address information for this chunk */ - ctg_store.contig.dset_addr = udata.addr; - - /* Update the local variable tracking the dxpl's actual io Mode. */ - actual_io_mode = actual_io_mode | H5D_MPIO_CHUNK_COLLECTIVE; - - if(H5D__inter_collective_io(&ctg_io_info, type_info, chunk_info->fspace, chunk_info->mspace) < 0) - HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL,"couldn't finish shared collective MPI-IO") - } /* end else */ - - if(make_coll) - if(H5D__ioinfo_xfer_mode(io_info, dx_plist, H5FD_MPIO_COLLECTIVE) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't switch to independent I/O") - - /* Get the next chunk node in the skip list */ - chunk_node = H5SL_next(chunk_node); - } /* end while */ - - /* Write the local value of actual io mode to the DXPL. */ - if(H5P_set(dx_plist, H5D_MPIO_ACTUAL_IO_MODE_NAME, &actual_io_mode) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "couldn't set actual io mode property") - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5D__multi_chunk_collective_io_no_opt */ /*------------------------------------------------------------------------- @@ -1794,7 +1599,6 @@ H5D__obtain_mpio_mode(H5D_io_info_t* io_info, H5D_chunk_map_t *fm, { int total_chunks; unsigned percent_nproc_per_chunk, threshold_nproc_per_chunk; - H5FD_mpio_chunk_opt_t chunk_opt_mode; uint8_t* io_mode_info = NULL; uint8_t* recv_io_mode_info = NULL; uint8_t* mergebuf = NULL; @@ -1827,8 +1631,8 @@ H5D__obtain_mpio_mode(H5D_io_info_t* io_info, H5D_chunk_map_t *fm, /* Setup parameters */ H5_ASSIGN_OVERFLOW(total_chunks, fm->layout->u.chunk.nchunks, hsize_t, int); percent_nproc_per_chunk = H5P_peek_unsigned(dx_plist, H5D_XFER_MPIO_CHUNK_OPT_RATIO_NAME); - chunk_opt_mode = (H5FD_mpio_chunk_opt_t)H5P_peek_unsigned(dx_plist, H5D_XFER_MPIO_CHUNK_OPT_HARD_NAME); - if((chunk_opt_mode == H5FD_MPIO_CHUNK_MULTI_IO) || (percent_nproc_per_chunk == 0)) { + /* if ratio is 0, perform collective io */ + if(0 == percent_nproc_per_chunk) { if(H5D__chunk_addrmap(io_info, chunk_addr) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get chunk address"); for(ic = 0; ic < total_chunks; ic++) diff --git a/src/H5Dprivate.h b/src/H5Dprivate.h index 02c2138..5acf43f 100644 --- a/src/H5Dprivate.h +++ b/src/H5Dprivate.h @@ -87,8 +87,6 @@ #define H5D_XFER_COLL_CHUNK_LINK_NUM_FALSE_NAME "coll_chunk_link_false" #define H5D_XFER_COLL_CHUNK_MULTI_RATIO_COLL_NAME "coll_chunk_multi_coll" #define H5D_XFER_COLL_CHUNK_MULTI_RATIO_IND_NAME "coll_chunk_multi_ind" -#define H5D_XFER_COLL_CHUNK_LINK_TO_MULTI "coll_chunk_link_mul"/* Internal transferring from link to multiple chunk */ -#define H5D_XFER_COLL_CHUNK_LINK_TO_MULTI_OPT "coll_chunk_link_mul_opt"/* Internal transferring from link opt to multiple chunk opt*/ /* Definitions for all collective chunk instrumentation properties */ #define H5D_XFER_COLL_CHUNK_SIZE sizeof(unsigned) diff --git a/src/H5Ppublic.h b/src/H5Ppublic.h index 507bbb6..6b17e0b 100644 --- a/src/H5Ppublic.h +++ b/src/H5Ppublic.h @@ -131,8 +131,7 @@ typedef enum H5D_mpio_actual_chunk_opt_mode_t { */ H5D_MPIO_NO_CHUNK_OPTIMIZATION = 0, H5D_MPIO_LINK_CHUNK, - H5D_MPIO_MULTI_CHUNK, - H5D_MPIO_MULTI_CHUNK_NO_OPT + H5D_MPIO_MULTI_CHUNK } H5D_mpio_actual_chunk_opt_mode_t; typedef enum H5D_mpio_actual_io_mode_t { diff --git a/src/H5trace.c b/src/H5trace.c index 2dab8ec..92736d4 100644 --- a/src/H5trace.c +++ b/src/H5trace.c @@ -612,10 +612,6 @@ H5_trace(const double *returning, const char *func, const char *type, ...) fprintf(out, "H5D_MPIO_MULTI_CHUNK"); break; - case H5D_MPIO_MULTI_CHUNK_NO_OPT: - fprintf(out, "H5D_MPIO_MULTI_CHUNK_NO_OPT"); - break; - default: fprintf(out, "%ld", (long)chunk_opt_mode); break; diff --git a/testpar/t_coll_chunk.c b/testpar/t_coll_chunk.c index 61e7bfd..73e7f09 100644 --- a/testpar/t_coll_chunk.c +++ b/testpar/t_coll_chunk.c @@ -258,8 +258,10 @@ coll_chunk5(void) /*------------------------------------------------------------------------- * Function: coll_chunk6 * - * Purpose: Wrapper to test the collective chunk IO for regular JOINT - selection with at least number of 2*mpi_size chunks + * Purpose: Test direct request for multi-chunk-io. + * Wrapper to test the collective chunk IO for regular JOINT + * selection with at least number of 2*mpi_size chunks + * Test for direct to Multi Chunk I/O. * * Return: Success: 0 * @@ -489,6 +491,12 @@ coll_chunk10(void) * * Failure: -1 * + * Modifications: + * Remove invalid temporary property checkings for API_LINK_HARD and + * API_LINK_TRUE cases. + * Programmer: Jonathan Kim + * Date: 2012-10-10 + * * Programmer: Unknown * July 12th, 2004 * @@ -634,11 +642,6 @@ coll_chunktest(const char* filename, NULL, NULL, NULL, NULL, NULL, NULL); VRFY((status >= 0),"testing property list inserted succeeded"); - prop_value = H5D_XFER_COLL_CHUNK_FIX; - status = H5Pinsert2(xfer_plist, H5D_XFER_COLL_CHUNK_LINK_TO_MULTI, H5D_XFER_COLL_CHUNK_SIZE, &prop_value, - NULL, NULL, NULL, NULL, NULL, NULL); - VRFY((status >= 0),"testing property list inserted succeeded"); - break; case API_MULTI_HARD: @@ -654,11 +657,6 @@ coll_chunktest(const char* filename, NULL, NULL, NULL, NULL, NULL, NULL); VRFY((status >= 0),"testing property list inserted succeeded"); - prop_value = H5D_XFER_COLL_CHUNK_FIX; - status = H5Pinsert2(xfer_plist, H5D_XFER_COLL_CHUNK_LINK_TO_MULTI_OPT, H5D_XFER_COLL_CHUNK_SIZE, &prop_value, - NULL, NULL, NULL, NULL, NULL, NULL); - VRFY((status >= 0),"testing property list inserted succeeded"); - break; case API_LINK_FALSE: @@ -699,25 +697,17 @@ coll_chunktest(const char* filename, case API_LINK_HARD: status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_LINK_HARD_NAME,&prop_value); VRFY((status >= 0),"testing property list get succeeded"); - if(prop_value !=0){/*double check if the option is switched to multiple chunk internally.*/ - status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_LINK_TO_MULTI, &prop_value); - VRFY((status >= 0),"testing property list get succeeded"); - VRFY((prop_value == 1),"API to set LINK COLLECTIVE IO without optimization succeeded"); - } + VRFY((prop_value == 0),"API to set LINK COLLECTIVE IO directly succeeded"); break; case API_MULTI_HARD: status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_MULTI_HARD_NAME,&prop_value); VRFY((status >= 0),"testing property list get succeeded"); - VRFY((prop_value == 0),"API to set MULTI-CHUNK COLLECTIVE IO without optimization succeeded"); + VRFY((prop_value == 0),"API to set MULTI-CHUNK COLLECTIVE IO optimization succeeded"); break; case API_LINK_TRUE: status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_LINK_NUM_TRUE_NAME,&prop_value); VRFY((status >= 0),"testing property list get succeeded"); - if(prop_value !=0){/*double check if the option is switched to multiple chunk internally.*/ - status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_LINK_TO_MULTI_OPT, &prop_value); - VRFY((status >= 0),"testing property list get succeeded"); - VRFY((prop_value == 1),"API to set LINK COLLECTIVE IO without optimization succeeded"); - } + VRFY((prop_value == 0),"API to set LINK COLLECTIVE IO succeeded"); break; case API_LINK_FALSE: status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_LINK_NUM_FALSE_NAME,&prop_value); diff --git a/testpar/t_dset.c b/testpar/t_dset.c index 22eefbc..79a5555 100644 --- a/testpar/t_dset.c +++ b/testpar/t_dset.c @@ -2523,12 +2523,12 @@ none_selection_chunk(void) * H5D_mpi_chunk_collective_io, processes disagree. The root reports * collective, the rest report independent I/O * - * TEST_ACTUAL_IO_MULTI_CHUNK_NO_OPT_COL: - * H5D_mpi_chunk_collective_io_no_opt, each process reports collective I/O - * - * TEST_ACTUAL_IO_MULTI_CHUNK_NO_OPT_MIX_DISAGREE: - * H5D_mpi_chunk_collective_io_no_opt, processes disagree - * (collective and mixed I/O) + * TEST_ACTUAL_IO_DIRECT_MULTI_CHUNK_IND: + * Same test TEST_ACTUAL_IO_MULTI_CHUNK_IND. + * Set directly go to multi-chunk-io without num threshold calc. + * TEST_ACTUAL_IO_DIRECT_MULTI_CHUNK_COL: + * Same test TEST_ACTUAL_IO_MULTI_CHUNK_COL. + * Set directly go to multi-chunk-io without num threshold calc. * * TEST_ACTUAL_IO_LINK_CHUNK: * H5D_link_chunk_collective_io, processes report linked chunk I/O @@ -2547,10 +2547,17 @@ none_selection_chunk(void) * (The most complex case that works on all builds) and then performs * an independent read and write with the same dxpls. * - * It may seem like TEST_ACTUAL_IO_MULTI_CHUNK_NO_OPT_IND and - * TEST_ACTUAL_IO_MULTI_CHUNK_NO_OPT_MIX have been accidentally - * left out. This is intentional; the other test cases sufficiently - * cover all cases for Multi Chunk No Opt I/O. + * Note: DIRECT_MULTI_CHUNK_MIX and DIRECT_MULTI_CHUNK_MIX_DISAGREE + * is not needed as they are covered by DIRECT_CHUNK_MIX and + * MULTI_CHUNK_MIX_DISAGREE cases. _DIRECT_ cases are only for testing + * path way to multi-chunk-io by H5FD_MPIO_CHUNK_MULTI_IO insted of num-threshold. + * + * Modification: + * - Refctore to remove multi-chunk-without-opimization test and update for + * testing direct to multi-chunk-io + * Programmer: Jonathan Kim + * Date: 2012-10-10 + * * * Programmer: Jacob Gruber * Date: 2011-04-06 @@ -2565,8 +2572,8 @@ test_actual_io_mode(int selection_mode) { H5D_mpio_actual_io_mode_t actual_io_mode_expected = -1; const char * filename; const char * test_name; - hbool_t multi_chunk_no_opt; - hbool_t multi_chunk_with_opt; + hbool_t direct_multi_chunk_io; + hbool_t multi_chunk_io; hbool_t is_chunked; hbool_t is_collective; int mpi_size = -1; @@ -2593,18 +2600,18 @@ test_actual_io_mode(int selection_mode) { hsize_t count[RANK]; hsize_t block[RANK]; hbool_t use_gpfs = FALSE; + char message[256]; herr_t ret; /* Set up some flags to make some future if statements slightly more readable */ - multi_chunk_no_opt = ( - selection_mode == TEST_ACTUAL_IO_MULTI_CHUNK_NO_OPT_IND || - selection_mode == TEST_ACTUAL_IO_MULTI_CHUNK_NO_OPT_COL || - selection_mode == TEST_ACTUAL_IO_MULTI_CHUNK_NO_OPT_MIX_DISAGREE ); + direct_multi_chunk_io = ( + selection_mode == TEST_ACTUAL_IO_DIRECT_MULTI_CHUNK_IND || + selection_mode == TEST_ACTUAL_IO_DIRECT_MULTI_CHUNK_COL ); /* Note: RESET performs the same tests as MULTI_CHUNK_MIX_DISAGREE and then * tests independent I/O */ - multi_chunk_with_opt = ( + multi_chunk_io = ( selection_mode == TEST_ACTUAL_IO_MULTI_CHUNK_IND || selection_mode == TEST_ACTUAL_IO_MULTI_CHUNK_COL || selection_mode == TEST_ACTUAL_IO_MULTI_CHUNK_MIX || @@ -2673,6 +2680,7 @@ test_actual_io_mode(int selection_mode) { /* Independent I/O with optimization */ case TEST_ACTUAL_IO_MULTI_CHUNK_IND: + case TEST_ACTUAL_IO_DIRECT_MULTI_CHUNK_IND: /* Since the dataset is chunked by row and each process selects a row, * each process writes to a different chunk. This forces all I/O to be * independent. @@ -2686,6 +2694,7 @@ test_actual_io_mode(int selection_mode) { /* Collective I/O with optimization */ case TEST_ACTUAL_IO_MULTI_CHUNK_COL: + case TEST_ACTUAL_IO_DIRECT_MULTI_CHUNK_COL: /* The dataset is chunked by rows, so each process takes a column which * spans all chunks. Since the processes write non-overlapping regular * selections to each chunk, the operation is purely collective. @@ -2779,39 +2788,6 @@ test_actual_io_mode(int selection_mode) { break; - /* Collective I/O without optimization */ - case TEST_ACTUAL_IO_MULTI_CHUNK_NO_OPT_COL: - /* The dataset is chunked by rows, so when each process takes a column, its - * selection spans all chunks. Since no process writes more chunks than any - * other, colective I/O is never broken. */ - slab_set(mpi_rank, mpi_size, start, count, stride, block, BYCOL); - - test_name = "Multi Chunk No Opt - Collective"; - actual_chunk_opt_mode_expected = H5D_MPIO_MULTI_CHUNK_NO_OPT; - actual_io_mode_expected = H5D_MPIO_CHUNK_COLLECTIVE; - break; - - - /* Mixed I/O without optimization with disagreement */ - case TEST_ACTUAL_IO_MULTI_CHUNK_NO_OPT_MIX_DISAGREE: - /* Each process takes a column, but the root's column is shortened so that - * it only reads the first chunk. Since all the other processes are writing - * to more chunks, they will break collective after the first chunk. - */ - slab_set(mpi_rank, mpi_size, start, count, stride, block, BYCOL); - if(mpi_rank == 0) - block[0] = block[0] / mpi_size; - - test_name = "Multi Chunk No Opt - Mixed (Disagreement)"; - actual_chunk_opt_mode_expected = H5D_MPIO_MULTI_CHUNK_NO_OPT; - - if(mpi_rank == 0) - actual_io_mode_expected = H5D_MPIO_CHUNK_COLLECTIVE; - else - actual_io_mode_expected = H5D_MPIO_CHUNK_MIXED; - - break; - /* Linked Chunk I/O */ case TEST_ACTUAL_IO_LINK_CHUNK: /* Nothing special; link chunk I/O is forced in the dxpl settings. */ @@ -2887,20 +2863,25 @@ test_actual_io_mode(int selection_mode) { ret = H5Pset_dxpl_mpio(dxpl_write, H5FD_MPIO_COLLECTIVE); VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); - /* Set the threshold number of processes per chunk for link chunk I/O - * to twice mpi_size. This will prevent the threshold from ever being - * met, thus forcing multi chunk io instead of link chunk io. + /* Set the threshold number of processes per chunk to twice mpi_size. + * This will prevent the threshold from ever being met, thus forcing + * multi chunk io instead of link chunk io. + * This is via deault. */ - if(multi_chunk_with_opt) { + if(multi_chunk_io) { + /* force multi-chunk-io by threshold */ ret = H5Pset_dxpl_mpio_chunk_opt_num(dxpl_write, (unsigned) mpi_size*2); VRFY((ret >= 0), "H5Pset_dxpl_mpio_chunk_opt_num succeeded"); + /* set this to manipulate testing senario about allocating processes + * to chunks */ ret = H5Pset_dxpl_mpio_chunk_opt_ratio(dxpl_write, (unsigned) 99); VRFY((ret >= 0), "H5Pset_dxpl_mpio_chunk_opt_ratio succeeded"); } - /* Request multi chunk I/O without optimization */ - if(multi_chunk_no_opt) { + /* Set directly go to multi-chunk-io without threshold calc. */ + if(direct_multi_chunk_io) { + /* set for multi chunk io by property*/ ret = H5Pset_dxpl_mpio_chunk_opt(dxpl_write, H5FD_MPIO_CHUNK_MULTI_IO); VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); } @@ -2943,7 +2924,6 @@ test_actual_io_mode(int selection_mode) { /* Test values */ if(actual_chunk_opt_mode_expected != (unsigned) -1 && actual_io_mode_expected != (unsigned) -1) { - char message[100]; sprintf(message, "Actual Chunk Opt Mode has the correct value for %s.\n",test_name); VRFY((actual_chunk_opt_mode_write == actual_chunk_opt_mode_expected), message); sprintf(message, "Actual IO Mode has the correct value for %s.\n",test_name); @@ -3027,6 +3007,9 @@ actual_io_mode_tests(void) { test_actual_io_mode(TEST_ACTUAL_IO_NO_COLLECTIVE); + /* + * Test multi-chunk-io via proc_num threshold + */ test_actual_io_mode(TEST_ACTUAL_IO_MULTI_CHUNK_IND); test_actual_io_mode(TEST_ACTUAL_IO_MULTI_CHUNK_COL); @@ -3038,8 +3021,11 @@ actual_io_mode_tests(void) { test_actual_io_mode(TEST_ACTUAL_IO_MULTI_CHUNK_MIX_DISAGREE); - test_actual_io_mode(TEST_ACTUAL_IO_MULTI_CHUNK_NO_OPT_COL); - test_actual_io_mode(TEST_ACTUAL_IO_MULTI_CHUNK_NO_OPT_MIX_DISAGREE); + /* + * Test multi-chunk-io via setting direct property + */ + test_actual_io_mode(TEST_ACTUAL_IO_DIRECT_MULTI_CHUNK_IND); + test_actual_io_mode(TEST_ACTUAL_IO_DIRECT_MULTI_CHUNK_COL); test_actual_io_mode(TEST_ACTUAL_IO_LINK_CHUNK); test_actual_io_mode(TEST_ACTUAL_IO_CONTIGUOUS); diff --git a/testpar/testphdf5.c b/testpar/testphdf5.c index 97a377e..784892a 100644 --- a/testpar/testphdf5.c +++ b/testpar/testphdf5.c @@ -440,7 +440,7 @@ int main(int argc, char **argv) "linked chunk collective IO without optimization",PARATESTFILE); AddTest((mpi_size < 3)? "-cchunk6" : "cchunk6", coll_chunk6,NULL, - "multi-chunk collective IO without optimization",PARATESTFILE); + "multi-chunk collective IO with direct request",PARATESTFILE); AddTest((mpi_size < 3)? "-cchunk7" : "cchunk7", coll_chunk7,NULL, "linked chunk collective IO with optimization",PARATESTFILE); diff --git a/testpar/testphdf5.h b/testpar/testphdf5.h index 2219dc9..fa83697 100644 --- a/testpar/testphdf5.h +++ b/testpar/testphdf5.h @@ -169,11 +169,10 @@ enum H5TEST_COLL_CHUNK_API {API_NONE=0,API_LINK_HARD, #define TEST_ACTUAL_IO_MULTI_CHUNK_COL 3 #define TEST_ACTUAL_IO_MULTI_CHUNK_MIX 4 #define TEST_ACTUAL_IO_MULTI_CHUNK_MIX_DISAGREE 5 -#define TEST_ACTUAL_IO_MULTI_CHUNK_NO_OPT_IND 6 -#define TEST_ACTUAL_IO_MULTI_CHUNK_NO_OPT_COL 7 -#define TEST_ACTUAL_IO_MULTI_CHUNK_NO_OPT_MIX_DISAGREE 8 -#define TEST_ACTUAL_IO_LINK_CHUNK 9 -#define TEST_ACTUAL_IO_CONTIGUOUS 10 +#define TEST_ACTUAL_IO_DIRECT_MULTI_CHUNK_IND 6 +#define TEST_ACTUAL_IO_DIRECT_MULTI_CHUNK_COL 7 +#define TEST_ACTUAL_IO_LINK_CHUNK 8 +#define TEST_ACTUAL_IO_CONTIGUOUS 9 /* Definitions of the selection mode for the no_collective_cause_tests function. */ #define TEST_COLLECTIVE 0x001 |