From 45f1eba2132c179e92d7144c6ff8713573ad24fc Mon Sep 17 00:00:00 2001 From: Quincey Koziol Date: Sat, 13 Aug 2005 13:09:57 -0500 Subject: [svn-r11241] Purpose: Code cleanup Description: Fix logic error in previous checkin and also finish refactoring I/O initialization, including simplifying all the collective & parallel cases into a more unified mechanism. Platforms tested: FreeBSD 4.11 (sleipnir) w/ & w/o parallel Linux 2.4 (mir) --- src/H5Dio.c | 447 +++++++++++++++------------------------------------------- src/H5Dmpio.c | 82 ++++++++--- src/H5Dpkg.h | 5 +- 3 files changed, 175 insertions(+), 359 deletions(-) diff --git a/src/H5Dio.c b/src/H5Dio.c index 6956c23..df9e1b7 100644 --- a/src/H5Dio.c +++ b/src/H5Dio.c @@ -102,8 +102,6 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts, static herr_t H5D_ioinfo_make_ind(H5D_io_info_t *io_info); static herr_t H5D_ioinfo_make_coll(H5D_io_info_t *io_info); static herr_t H5D_ioinfo_term(H5D_io_info_t *io_info); -static htri_t H5D_get_collective_io_consensus(const H5F_t *file, - const htri_t local_opinion, const unsigned flags); static herr_t H5D_mpio_get_min_chunk(const H5D_io_info_t *io_info, const fm_map *fm, int *min_chunkf); #endif /* H5_HAVE_PARALLEL */ @@ -111,7 +109,7 @@ static herr_t H5D_mpio_get_min_chunk(const H5D_io_info_t *io_info, /* I/O info operations */ static herr_t H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id, const H5S_t *mem_space, const H5S_t *file_space, - H5T_path_t *tpath, unsigned flags, H5D_io_info_t *io_info); + H5T_path_t *tpath, H5D_io_info_t *io_info); /* Chunk operations */ static herr_t H5D_create_chunk_map(const H5D_t *dataset, const H5T_t *mem_type, @@ -644,19 +642,11 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, H5T_path_t *tpath = NULL; /*type conversion info */ const H5T_t *mem_type = NULL; /* Memory datatype */ H5D_io_info_t io_info; /* Dataset I/O info */ -#ifdef H5_HAVE_PARALLEL -#ifdef H5_HAVE_INSTRUMENTED_LIBRARY - int prop_value,new_value; - htri_t check_prop; -#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */ -#endif /*H5_HAVE_PARALLEL*/ + hbool_t io_info_init = FALSE; /* Whether the I/O info has been initialized */ H5D_dxpl_cache_t _dxpl_cache; /* Data transfer property cache buffer */ H5D_dxpl_cache_t *dxpl_cache=&_dxpl_cache; /* Data transfer property cache */ - unsigned sconv_flags=0; /* Flags for the space conversion */ herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_NOAPI_NOINIT(H5D_read) /* check args */ @@ -682,11 +672,6 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, /* Collective access is not permissible without a MPI based VFD */ if (dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPI(dataset->ent.file)) HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPI-based drivers only") - - /* Set the "parallel I/O possible" flag, for H5S_find(), if we are doing collective I/O */ - /* (Don't set the parallel I/O possible flag for the MPI-posix driver, since it doesn't do real collective I/O) */ - if (H5S_mpi_opt_types_g && dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPIPOSIX(dataset->ent.file)) - sconv_flags |= H5S_CONV_PAR_IO_POSSIBLE; #endif /*H5_HAVE_PARALLEL*/ /* Make certain that the number of elements in each selection is the same */ @@ -744,51 +729,10 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, if (NULL==(tpath=H5T_path_find(dataset->shared->type, mem_type, NULL, NULL, dxpl_id))) HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to convert between src and dest data types") - /* Set the storage flags for the space conversion check */ - switch(dataset->shared->layout.type) { - case H5D_COMPACT: - sconv_flags |= H5S_CONV_STORAGE_COMPACT; - break; - - case H5D_CONTIGUOUS: - sconv_flags |= H5S_CONV_STORAGE_CONTIGUOUS; - break; - - case H5D_CHUNKED: - sconv_flags |= H5S_CONV_STORAGE_CHUNKED; - break; - - default: - assert(0 && "Unhandled layout type!"); - } /* end switch */ - /* Set up I/O operation */ - if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,tpath,sconv_flags,&io_info)<0) + if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,tpath,&io_info)<0) HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to set up I/O operation") - -#ifdef H5_HAVE_PARALLEL -#ifdef H5_HAVE_INSTRUMENTED_LIBRARY - /**** Test for collective chunk IO - notice the following code should be removed after - a more general collective chunk IO algorithm is applied. - */ - if(dataset->shared->layout.type == H5D_CHUNKED) { /*only check for chunking storage */ - check_prop = H5Pexist(dxpl_id,H5D_XFER_COLL_CHUNK_NAME); - if(check_prop < 0) - HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to check property list"); - if(check_prop > 0) { - if(H5Pget(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&prop_value)<0) - HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to get property value"); - if(!io_info.use_par_opt_io) { - new_value = 0; - if(H5Pset(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&new_value)<0) - HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to set property value"); - } - } - } - /* end Test for collective chunk IO */ -#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */ -#endif /*H5_HAVE_PARALLEL*/ + io_info_init = TRUE; /* Determine correct I/O routine to invoke */ if(dataset->shared->layout.type!=H5D_CHUNKED) { @@ -805,8 +749,9 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, done: #ifdef H5_HAVE_PARALLEL /* Shut down io_info struct */ - if (H5D_ioinfo_term(&io_info) < 0) - HDONE_ERROR(H5E_DATASET, H5E_CANTCLOSEOBJ, FAIL, "can't shut down io_info") + if (io_info_init) + if(H5D_ioinfo_term(&io_info) < 0) + HDONE_ERROR(H5E_DATASET, H5E_CANTCLOSEOBJ, FAIL, "can't shut down io_info") #endif /*H5_HAVE_PARALLEL*/ FUNC_LEAVE_NOAPI(ret_value) @@ -868,15 +813,9 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, H5T_path_t *tpath = NULL; /*type conversion info */ const H5T_t *mem_type = NULL; /* Memory datatype */ H5D_io_info_t io_info; /* Dataset I/O info */ -#ifdef H5_HAVE_PARALLEL -#ifdef H5_HAVE_INSTRUMENTED_LIBRARY - int prop_value,new_value; - htri_t check_prop; -#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */ -#endif /*H5_HAVE_PARALLEL*/ + hbool_t io_info_init = FALSE; /* Whether the I/O info has been initialized */ H5D_dxpl_cache_t _dxpl_cache; /* Data transfer property cache buffer */ H5D_dxpl_cache_t *dxpl_cache=&_dxpl_cache; /* Data transfer property cache */ - unsigned sconv_flags=0; /* Flags for the space conversion */ herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI_NOINIT(H5D_write) @@ -897,22 +836,6 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, dataset->shared->checked_filters = TRUE; } - /* If MPI based VFD is used, no VL datatype support yet. */ - /* This is because they use the global heap in the file and we don't */ - /* support parallel access of that yet */ - if (IS_H5FD_MPI(dataset->ent.file) && H5T_detect_class(mem_type, H5T_VLEN)>0) - HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing VL datatypes yet") - /* If MPI based VFD is used, no VL datatype support yet. */ - /* This is because they use the global heap in the file and we don't */ - /* support parallel access of that yet */ - /* We should really use H5T_detect_class() here, but it will be difficult - * to detect the type of the reference if it is nested... -QAK - */ - if (IS_H5FD_MPI(dataset->ent.file) && - H5T_get_class(mem_type, TRUE)==H5T_REFERENCE && - H5T_get_ref_type(mem_type)==H5R_DATASET_REGION) - HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing region reference datatypes yet") - /* Check if we are allowed to write to this file */ if (0==(H5F_get_intent(dataset->ent.file) & H5F_ACC_RDWR)) HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "no write intent on file") @@ -921,6 +844,32 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, if (H5D_get_dxpl_cache(dxpl_id,&dxpl_cache)<0) HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't fill dxpl cache") + /* Various MPI based checks */ + if (IS_H5FD_MPI(dataset->ent.file)) { + /* If MPI based VFD is used, no VL datatype support yet. */ + /* This is because they use the global heap in the file and we don't */ + /* support parallel access of that yet */ + if(H5T_detect_class(mem_type, H5T_VLEN)>0) + HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing VL datatypes yet") + + /* If MPI based VFD is used, no VL datatype support yet. */ + /* This is because they use the global heap in the file and we don't */ + /* support parallel access of that yet */ + /* We should really use H5T_detect_class() here, but it will be difficult + * to detect the type of the reference if it is nested... -QAK + */ + if (H5T_get_class(mem_type, TRUE)==H5T_REFERENCE && + H5T_get_ref_type(mem_type)==H5R_DATASET_REGION) + HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing region reference datatypes yet") + } /* end if */ +#ifdef H5_HAVE_PARALLEL + else { + /* Collective access is not permissible without a MPI based VFD */ + if (dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE) + HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPI-based driver only") + } /* end else */ +#endif /*H5_HAVE_PARALLEL*/ + if (!file_space) file_space = dataset->shared->space; if (!mem_space) @@ -929,17 +878,6 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, HGOTO_ERROR (H5E_ARGS, H5E_BADVALUE, FAIL, "src dataspace has invalid selection") H5_ASSIGN_OVERFLOW(nelmts,snelmts,hssize_t,hsize_t); -#ifdef H5_HAVE_PARALLEL - /* Collective access is not permissible without a MPI based VFD */ - if (dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPI(dataset->ent.file)) - HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPI-based driver only") - - /* Set the "parallel I/O possible" flag, for H5S_find(), if we are doing collective I/O */ - /* (Don't set the parallel I/O possible flag for the MPI-posix driver, since it doesn't do real collective I/O) */ - if (H5S_mpi_opt_types_g && dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPIPOSIX(dataset->ent.file)) - sconv_flags |= H5S_CONV_PAR_IO_POSSIBLE; -#endif /*H5_HAVE_PARALLEL*/ - /* Make certain that the number of elements in each selection is the same */ if (nelmts!=(hsize_t)H5S_GET_SELECT_NPOINTS(file_space)) HGOTO_ERROR (H5E_ARGS, H5E_BADVALUE, FAIL, "src and dest data spaces have different sizes") @@ -986,51 +924,10 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, if (NULL==(tpath=H5T_path_find(mem_type, dataset->shared->type, NULL, NULL, dxpl_id))) HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to convert between src and dest data types") - /* Set the storage flags for the space conversion check */ - switch(dataset->shared->layout.type) { - case H5D_COMPACT: - sconv_flags |= H5S_CONV_STORAGE_COMPACT; - break; - - case H5D_CONTIGUOUS: - sconv_flags |= H5S_CONV_STORAGE_CONTIGUOUS; - break; - - case H5D_CHUNKED: - sconv_flags |= H5S_CONV_STORAGE_CHUNKED; - break; - - default: - assert(0 && "Unhandled layout type!"); - } /* end switch */ - /* Set up I/O operation */ - if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,tpath,sconv_flags,&io_info)<0) + if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,tpath,&io_info)<0) HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to set up I/O operation") - -#ifdef H5_HAVE_PARALLEL -#ifdef H5_HAVE_INSTRUMENTED_LIBRARY - /**** Test for collective chunk IO - notice the following code should be removed after - a more general collective chunk IO algorithm is applied. - */ - if(dataset->shared->layout.type == H5D_CHUNKED) { /*only check for chunking storage */ - check_prop = H5Pexist(dxpl_id,H5D_XFER_COLL_CHUNK_NAME); - if(check_prop < 0) - HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to check property list"); - if(check_prop > 0) { - if(H5Pget(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&prop_value)<0) - HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to get property value"); - if(!io_info.use_par_opt_io) { - new_value = 0; - if(H5Pset(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&new_value)<0) - HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to set property value"); - } - } - } - /* end Test for collective chunk IO */ -#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */ -#endif /*H5_HAVE_PARALLEL*/ + io_info_init = TRUE; /* Determine correct I/O routine to invoke */ if(dataset->shared->layout.type!=H5D_CHUNKED) { @@ -1062,8 +959,9 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space, done: #ifdef H5_HAVE_PARALLEL /* Shut down io_info struct */ - if (H5D_ioinfo_term(&io_info) < 0) - HDONE_ERROR(H5E_DATASET, H5E_CANTCLOSEOBJ, FAIL, "can't shut down io_info") + if (io_info_init) + if(H5D_ioinfo_term(&io_info) < 0) + HDONE_ERROR(H5E_DATASET, H5E_CANTCLOSEOBJ, FAIL, "can't shut down io_info") #endif /*H5_HAVE_PARALLEL*/ FUNC_LEAVE_NOAPI(ret_value) @@ -1711,32 +1609,25 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts, int mpi_code; /* MPI error code */ int all_regular = 0; /* If this chunk's selections are regular on all processes */ + /* Determine if this process has regular selections */ if(H5S_SELECT_IS_REGULAR(chunk_info->fspace) == TRUE && H5S_SELECT_IS_REGULAR(chunk_info->mspace) == TRUE) is_regular = 1; else is_regular = 0; - if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&all_regular,&is_regular,1,MPI_INT,MPI_MIN,0,comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code) - if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&all_regular,1,MPI_INT,0,comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) + /* Determine if all processes have regular selections */ + if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&is_regular, &all_regular, 1, MPI_INT, MPI_LAND, io_info->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code) - if(!is_regular) { + /* For regular selection, + if MPI_COMPLEX_DERIVED_DATATYPE is not defined, + unless spaces for all processors are regular, independent read operation should be performed.*/ + if(!all_regular) { /* Switch to independent I/O (temporarily) */ make_ind = TRUE; make_coll = TRUE; - } - else { - /* For regular selection, - if MPI_COMPLEX_DERIVED_DATATYPE is not defined, - unless spaces for all processors are regular, independent read operation should be performed.*/ - if(!all_regular) { - /* Switch to independent I/O (temporarily) */ - make_ind = TRUE; - make_coll = TRUE; - } /* end if */ - } /* end else */ + } /* end if */ } /* end else */ #endif /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */ } /* end if */ @@ -2104,32 +1995,25 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts, int mpi_code; /* MPI error code */ int all_regular = 0; /* If this chunk's selections are regular on all processes */ + /* Determine if this process has regular selections */ if(H5S_SELECT_IS_REGULAR(chunk_info->fspace) == TRUE && H5S_SELECT_IS_REGULAR(chunk_info->mspace) == TRUE) - is_regular = 1; + is_regular = 1; else is_regular = 0; - if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&all_regular,&is_regular,1,MPI_INT,MPI_MIN,0,comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code) - if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&all_regular,1,MPI_INT,0,comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) + /* Determine if all processes have regular selections */ + if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&is_regular, &all_regular, 1, MPI_INT, MPI_LAND, io_info->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code) - if(!is_regular) { + /* For regular selection, + if MPI_COMPLEX_DERIVED_DATATYPE is not defined, + unless spaces for all processors are regular, independent read operation should be performed.*/ + if(!all_regular) { /* Switch to independent I/O (temporarily) */ make_ind = TRUE; make_coll = TRUE; - } - else { - /* For regular selection, - if MPI_COMPLEX_DERIVED_DATATYPE is not defined, - unless spaces for all processors are regular, independent read operation should be performed.*/ - if(!all_regular) { - /* Switch to independent I/O (temporarily) */ - make_ind = TRUE; - make_coll = TRUE; - } /* end if */ - } /* end else */ + } /* end if */ } /* end else */ #endif /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */ } /* end if */ @@ -3203,103 +3087,6 @@ done: /*------------------------------------------------------------------------- - * Function: H5D_get_collective_io_consensus - * - * Purpose: Compare notes with all other processes involved in this I/O - * and see if all are go for collective I/O. - * - * If all are, return TRUE. - * - * If any process can't manage collective I/O, then collective - * I/O is impossible, and we return FALSE. - * - * If the flags indicate that collective I/O is impossible, - * skip the interprocess communication and just return FALSE. - * - * In any error is detected, return FAIL. - * - * Return: Success: TRUE or FALSE - * - * Failure: FAIL - * - * Programmer: JRM -- 8/30/04 - * - * Modifications: - * - * None. - * - *------------------------------------------------------------------------- - */ - -#ifdef H5_HAVE_PARALLEL -static htri_t -H5D_get_collective_io_consensus(const H5F_t *file, - const htri_t local_opinion, - const unsigned flags) -{ - htri_t ret_value = FAIL; /* will update if successful */ - MPI_Comm comm; - int int_local_opinion; - int consensus; - int mpi_result; - - FUNC_ENTER_NOAPI_NOINIT(H5D_get_collective_io_consensus); - - HDassert ( ( local_opinion == TRUE ) || ( local_opinion == FALSE ) ); - - /* Don't do the interprocess communication unless the Parallel I/O - * conversion flag is set -- there may not be other processes to - * talk to. - */ - if ( ! ( flags & flags&H5S_CONV_PAR_IO_POSSIBLE ) ) { - - HGOTO_DONE(FALSE); - } - - comm = H5F_mpi_get_comm(file); - - if ( comm == MPI_COMM_NULL ) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, \ - "can't retrieve MPI communicator") - - if ( local_opinion == TRUE ) { - - int_local_opinion = 1; - - } else { - - int_local_opinion = 0; - } - - mpi_result = MPI_Allreduce((void *)(&int_local_opinion), - (void *)(&consensus), - 1, - MPI_INT, - MPI_LAND, - comm); - - if ( mpi_result != MPI_SUCCESS ) - HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_result) - - if ( consensus ) { - - ret_value = TRUE; - - } else { - - ret_value = FALSE; - } - -done: - - FUNC_LEAVE_NOAPI(ret_value); - -} /* H5D_get_collective_io_consensus() */ - -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- * Function: H5D_ioinfo_init * * Purpose: Routine for determining correct I/O operations for @@ -3328,16 +3115,9 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id, #ifndef H5_HAVE_PARALLEL UNUSED #endif /* H5_HAVE_PARALLEL */ - *tpath, unsigned -#ifndef H5_HAVE_PARALLEL - UNUSED -#endif /* H5_HAVE_PARALLEL */ - flags, + *tpath, H5D_io_info_t *io_info) { -#ifdef H5_HAVE_PARALLEL - htri_t opt; /* Flag whether a selection is optimizable */ -#endif /* H5_HAVE_PARALLEL */ herr_t ret_value = SUCCEED; /* Return value */ #if defined H5_HAVE_PARALLEL || defined H5S_DEBUG @@ -3364,7 +3144,12 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id, io_info->ops=dset->shared->io_ops; #ifdef H5_HAVE_PARALLEL + /* Start in the "not modified" xfer_mode state */ + io_info->xfer_mode_changed = FALSE; + if(IS_H5FD_MPI(dset->ent.file)) { + htri_t opt; /* Flag whether a selection is optimizable */ + /* Get MPI communicator */ if((io_info->comm = H5F_mpi_get_comm(dset->ent.file)) == MPI_COMM_NULL) HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator") @@ -3372,72 +3157,69 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id, /* * Check if we can set direct MPI-IO read/write functions */ - opt=H5D_mpio_opt_possible(dset,mem_space,file_space,flags); + opt=H5D_mpio_opt_possible(io_info, mem_space, file_space, tpath); if(opt==FAIL) HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "invalid check for direct IO dataspace "); - opt = H5D_get_collective_io_consensus(dset->ent.file, opt, flags); - if ( opt == FAIL ) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "check for collective I/O consensus failed."); - /* Check if we can use the optimized parallel I/O routines */ if(opt==TRUE) { /* Set the pointers to the MPI-specific routines */ io_info->ops.read = H5D_mpio_select_read; io_info->ops.write = H5D_mpio_select_write; - - /* Indicate that the I/O will use collective */ - io_info->use_par_opt_io=TRUE; } /* end if */ else { /* Set the pointers to the non-MPI-specific routines */ io_info->ops.read = H5D_select_read; io_info->ops.write = H5D_select_write; - /* Indicate that the I/O will _NOT_ be parallel, use independent IO */ - io_info->use_par_opt_io=FALSE; - } /* end else */ - - /* Start in the "not modified" state */ - io_info->xfer_mode_changed=FALSE; - - /* If we can't or won't be doing collective I/O, but the user - * asked for collective I/O, change the request to use independent I/O - */ - if(!(io_info->use_par_opt_io && H5T_path_noop(tpath) && - H5Z_xform_noop(dxpl_cache->data_xform_prop)) - && io_info->dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE) { - H5P_genplist_t *dx_plist; /* Data transer property list */ - - /* Get the dataset transfer property list */ - if (NULL == (dx_plist = H5I_object(dxpl_id))) - HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset creation property list") - - /* Change the xfer_mode to independent for handling the I/O */ - io_info->dxpl_cache->xfer_mode = H5FD_MPIO_INDEPENDENT; - if(H5P_set (dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &io_info->dxpl_cache->xfer_mode) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set transfer mode") - - /* Set the pointers to the non-MPI-specific routines */ - io_info->ops.read = H5D_select_read; - io_info->ops.write = H5D_select_write; - - /* Indicate that the transfer mode should be restored before returning - * to user. + /* If we won't be doing collective I/O, but the user asked for + * collective I/O, change the request to use independent I/O, but + * mark it so that we remember to revert the change. */ - io_info->xfer_mode_changed=TRUE; - } /* end if */ + if(io_info->dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE) { + H5P_genplist_t *dx_plist; /* Data transer property list */ + + /* Get the dataset transfer property list */ + if (NULL == (dx_plist = H5I_object(dxpl_id))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset creation property list") + + /* Change the xfer_mode to independent for handling the I/O */ + io_info->dxpl_cache->xfer_mode = H5FD_MPIO_INDEPENDENT; + if(H5P_set (dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &io_info->dxpl_cache->xfer_mode) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set transfer mode") + + /* Indicate that the transfer mode should be restored before returning + * to user. + */ + io_info->xfer_mode_changed = TRUE; + } /* end if */ + +#ifdef H5_HAVE_INSTRUMENTED_LIBRARY + /**** Test for collective chunk IO + notice the following code should be removed after + a more general collective chunk IO algorithm is applied. + (This property is only reset for independent I/O) + */ + if(dset->shared->layout.type == H5D_CHUNKED) { /*only check for chunking storage */ + htri_t check_prop; + + check_prop = H5Pexist(dxpl_id,H5D_XFER_COLL_CHUNK_NAME); + if(check_prop < 0) + HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to check property list"); + if(check_prop > 0) { + int prop_value = 0; + + if(H5Pset(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&prop_value)<0) + HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to set property value"); + } /* end if */ + } /* end if */ +#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */ + } /* end else */ } /* end if */ else { - /* Indicate that the I/O will _NOT_ be parallel */ - io_info->use_par_opt_io=FALSE; - /* Set the pointers to the non-MPI-specific routines */ io_info->ops.read = H5D_select_read; io_info->ops.write = H5D_select_write; - - /* Set the "not modified" state */ - io_info->xfer_mode_changed=FALSE; } /* end else */ #else /* H5_HAVE_PARALLEL */ io_info->ops.read = H5D_select_read; @@ -3570,13 +3352,14 @@ done: static herr_t H5D_ioinfo_term(H5D_io_info_t *io_info) { - H5P_genplist_t *dx_plist; /* Data transer property list */ herr_t ret_value = SUCCEED; /*return value */ FUNC_ENTER_NOAPI_NOINIT(H5D_ioinfo_term) /* Check if we need to revert the change to the xfer mode */ - if (io_info->use_par_opt_io && io_info->xfer_mode_changed) { + if (io_info->xfer_mode_changed) { + H5P_genplist_t *dx_plist; /* Data transer property list */ + /* Get the dataset transfer property list */ if (NULL == (dx_plist = H5I_object(io_info->dxpl_id))) HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset transfer property list") @@ -3619,13 +3402,9 @@ H5D_mpio_get_min_chunk(const H5D_io_info_t *io_info, /* Get the number of chunks to perform I/O on */ num_chunkf = H5SL_count(fm->fsel); - /* Determine the minimum and maximum # of chunks for all processes */ - if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&num_chunkf,min_chunkf,1,MPI_INT,MPI_MIN,0,io_info->comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code) - - /* Broadcast the flag indicating the number of chunks are the same */ - if (MPI_SUCCESS != (mpi_code= MPI_Bcast(min_chunkf,1,MPI_INT,0,io_info->comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) + /* Determine the minimum # of chunks for all processes */ + if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&num_chunkf, min_chunkf, 1, MPI_INT, MPI_MIN, io_info->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code) done: FUNC_LEAVE_NOAPI(ret_value); diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c index 572aaca..479db17 100644 --- a/src/H5Dmpio.c +++ b/src/H5Dmpio.c @@ -24,7 +24,6 @@ #define H5D_PACKAGE /*suppress error about including H5Dpkg */ - #include "H5private.h" /* Generic Functions */ #include "H5Dpkg.h" /* Datasets */ #include "H5Eprivate.h" /* Error handling */ @@ -61,35 +60,51 @@ H5D_mpio_spaces_xfer(H5D_io_info_t *io_info, size_t elmt_size, *------------------------------------------------------------------------- */ htri_t -H5D_mpio_opt_possible( const H5D_t *dset, const H5S_t *mem_space, const H5S_t *file_space, const unsigned flags) +H5D_mpio_opt_possible( const H5D_io_info_t *io_info, + const H5S_t *mem_space, const H5S_t *file_space, const H5T_path_t *tpath) { + int local_opinion = TRUE; /* This process's idea of whether to perform collective I/O or not */ + int consensus; /* Consensus opinion of all processes */ + int mpi_code; /* MPI error code */ htri_t ret_value=TRUE; FUNC_ENTER_NOAPI(H5D_mpio_opt_possible, FAIL); /* Check args */ - assert(dset); + assert(io_info); assert(mem_space); assert(file_space); - /* Parallel I/O conversion flag must be set, if it is not collective IO, go to false. */ - if(!(flags&H5S_CONV_PAR_IO_POSSIBLE)) + /* For independent I/O, get out quickly and don't try to form consensus */ + if (io_info->dxpl_cache->xfer_mode==H5FD_MPIO_INDEPENDENT) HGOTO_DONE(FALSE); + /* Optimized MPI types flag must be set and it is must be collective IO */ + /* (Don't allow parallel I/O for the MPI-posix driver, since it doesn't do real collective I/O) */ + if (!(H5S_mpi_opt_types_g && io_info->dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPIPOSIX(io_info->dset->ent.file))) { + local_opinion = FALSE; + goto broadcast; + } /* end if */ + /* Check whether these are both simple or scalar dataspaces */ if (!((H5S_SIMPLE==H5S_GET_EXTENT_TYPE(mem_space) || H5S_SCALAR==H5S_GET_EXTENT_TYPE(mem_space)) - && (H5S_SIMPLE==H5S_GET_EXTENT_TYPE(file_space) || H5S_SCALAR==H5S_GET_EXTENT_TYPE(file_space)))) - HGOTO_DONE(FALSE); - + && (H5S_SIMPLE==H5S_GET_EXTENT_TYPE(file_space) || H5S_SCALAR==H5S_GET_EXTENT_TYPE(file_space)))) { + local_opinion = FALSE; + goto broadcast; + } /* end if */ /* Can't currently handle point selections */ - if (H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(mem_space) || H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(file_space)) - HGOTO_DONE(FALSE); + if (H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(mem_space) || H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(file_space)) { + local_opinion = FALSE; + goto broadcast; + } /* end if */ /* Dataset storage must be contiguous or chunked */ - if ((flags&H5S_CONV_STORAGE_MASK)!=H5S_CONV_STORAGE_CONTIGUOUS && - (flags&H5S_CONV_STORAGE_MASK)!=H5S_CONV_STORAGE_CHUNKED) - HGOTO_DONE(FALSE); + if (!(io_info->dset->shared->layout.type == H5D_CONTIGUOUS || + io_info->dset->shared->layout.type == H5D_CHUNKED)) { + local_opinion = FALSE; + goto broadcast; + } /* end if */ /*The handling of memory space is different for chunking and contiguous storage, @@ -102,18 +117,41 @@ H5D_mpio_opt_possible( const H5D_t *dset, const H5S_t *mem_space, const H5S_t *f support complicated MPI derived data type, we will set use_par_opt_io = FALSE. */ - if(dset->shared->layout.type == H5D_CONTIGUOUS) { - #ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS - if((H5S_SELECT_IS_REGULAR(file_space) != TRUE) || - (H5S_SELECT_IS_REGULAR(mem_space) != TRUE)) - HGOTO_DONE(FALSE); + if(io_info->dset->shared->layout.type == H5D_CONTIGUOUS) + if((H5S_SELECT_IS_REGULAR(file_space) != TRUE) || + (H5S_SELECT_IS_REGULAR(mem_space) != TRUE)) { + local_opinion = FALSE; + goto broadcast; + } /* end if */ #endif - } - if(dset->shared->layout.type == H5D_CHUNKED) - if(dset->shared->dcpl_cache.pline.nused>0) - HGOTO_DONE(FALSE); /* Perform the independent write operation */ + /* Don't allow collective operations if filters need to be applied */ + if(io_info->dset->shared->layout.type == H5D_CHUNKED) + if(io_info->dset->shared->dcpl_cache.pline.nused>0) { + local_opinion = FALSE; + goto broadcast; + } /* end if */ + + /* Don't allow collective operations if datatype conversions need to happen */ + if(!H5T_path_noop(tpath)) { + local_opinion = FALSE; + goto broadcast; + } /* end if */ + + /* Don't allow collective operations if data transform operations should occur */ + if(!H5Z_xform_noop(io_info->dxpl_cache->data_xform_prop)) { + local_opinion = FALSE; + goto broadcast; + } /* end if */ + +broadcast: + /* Form consensus opinion among all processes about whether to perform + * collective I/O */ + if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_opinion, &consensus, 1, MPI_INT, MPI_LAND, io_info->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code) + + ret_value = consensus > 0 ? TRUE : FALSE; done: FUNC_LEAVE_NOAPI(ret_value); diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h index cf416f3..609b590 100644 --- a/src/H5Dpkg.h +++ b/src/H5Dpkg.h @@ -105,7 +105,6 @@ typedef struct H5D_io_info_t { hid_t dxpl_id; /* Original DXPL ID */ #ifdef H5_HAVE_PARALLEL MPI_Comm comm; /* MPI communicator for file */ - hbool_t use_par_opt_io; /* Whether the 'optimized' I/O routines with be parallel */ hbool_t xfer_mode_changed; /* Whether the transfer mode was changed */ #endif /* H5_HAVE_PARALLEL */ const H5D_storage_t *store; /* Dataset storage info */ @@ -324,8 +323,8 @@ H5_DLL herr_t H5D_mpio_spaces_span_write(H5D_io_info_t *io_info, /* MPI-IO function to check if a direct I/O transfer is possible between * memory and the file */ -H5_DLL htri_t H5D_mpio_opt_possible(const H5D_t *dset, const H5S_t *mem_space, - const H5S_t *file_space, const unsigned flags); +H5_DLL htri_t H5D_mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *mem_space, + const H5S_t *file_space, const H5T_path_t *tpath); #endif /* H5_HAVE_PARALLEL */ /* Testing functions */ -- cgit v0.12