From 6262a14f2e6f669f72e0212b4ce3654c9526f1dc Mon Sep 17 00:00:00 2001 From: Raymond Lu Date: Mon, 20 Aug 2007 16:55:38 -0500 Subject: [svn-r14096] There're 3 changes in this checkin as below: 1. In H5Dwrite and H5Dread, let the data buffer point to a fake address if the application passes in an empty buffer. This is mainly for MPIO programs that some processes may not have any data to write or read but still participate the I/O. This solution solves some MPI's problem like the ChaMPIon on tungsten which doesn't support empty buffer. 2. The ChaMPIon on tungsten doesn't support complex derived MPI data type correctly and collective I/O when some processes don't have any data to write or read correctly. Detect the compiler "cmpicc" in the system-specific config file and set the variables for these two cases to false. The PHDF5 library already has set up a way to switch collective chunked I/O to independent under these two cases. 3. A bug fix - During the work of the optimization for compound data I/O, the case for switching collective chunked I/O to independent I/O was leftout. Fixed it by adding I/O caching to it in H5D_multi_chunk_collective_io in H5Dmpio.c. Tested on tungsten, cobalt, and kagiso for parallel; on linew and smirom for serial. --- config/linux-gnulibc1 | 10 +++ configure | 67 ++++++++++------ configure.in | 19 +++++ src/H5Dio.c | 24 ++++++ src/H5Dmpio.c | 98 ++++++++++++++++++----- testpar/t_dset.c | 212 ++++++++++++++++++++++++++++++++++++++++++++++++++ testpar/testphdf5.c | 3 + testpar/testphdf5.h | 1 + 8 files changed, 392 insertions(+), 42 deletions(-) diff --git a/config/linux-gnulibc1 b/config/linux-gnulibc1 index 5ece750..015d079 100644 --- a/config/linux-gnulibc1 +++ b/config/linux-gnulibc1 @@ -116,6 +116,16 @@ fi # Check MPICH settings . $srcdir/config/mpich + +# The ChaMPIon on NCSA tungsten doesn't work correctly for either of the following +# cases: +# 1. collective I/O when some processes don't have any contributions; +# 2. complex derived MPI data type. +if test $CC_BASENAME = cmpicc; then + hdf5_mpi_special_collective_io_works=${hdf5_mpi_special_collective_io_works='no'} + hdf5_mpi_complex_derived_datatype_works=${hdf5_mpi_complex_derived_datatype_works='no'} +fi + #Comment out the following line if your system supports collective IO when some processes #don't have any contributions to IOs. #hdf5_mpi_special_collective_io_works=${hdf5_mpi_special_collective_io_works='no'} diff --git a/configure b/configure index edd71b6..adc1755 100755 --- a/configure +++ b/configure @@ -1,5 +1,5 @@ #! /bin/sh -# From configure.in Id: configure.in 14047 2007-08-07 21:01:15Z mcgreevy . +# From configure.in Id: configure.in 14061 2007-08-09 19:46:44Z mcgreevy . # Guess values for system-dependent variables and create Makefiles. # Generated by GNU Autoconf 2.61 for HDF5 1.8.0-beta3post1. # @@ -7028,6 +7028,25 @@ echo "${ECHO_T}none" >&6; } PARALLEL="$CC_BASENAME" ;; + cmpicc) + PARALLEL=cmpicc + { echo "$as_me:$LINENO: checking for cmpirun" >&5 +echo $ECHO_N "checking for cmpirun... $ECHO_C" >&6; } + + if test -x cmpirun; then + { echo "$as_me:$LINENO: result: cmpirun" >&5 +echo "${ECHO_T}cmpirun" >&6; } + RUNSERIAL="${RUNSERIAL:-none}" + + if test -z "$RUNPARALLEL"; then + RUNPARALLEL="cmpirun -np \$\${NPROCS:=3}" + fi + else + { echo "$as_me:$LINENO: result: none" >&5 +echo "${ECHO_T}none" >&6; } + fi + ;; + *) ;; esac @@ -7907,7 +7926,7 @@ ia64-*-hpux*) ;; *-*-irix6*) # Find out which ABI we are using. - echo '#line 7910 "configure"' > conftest.$ac_ext + echo '#line 7929 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -10181,11 +10200,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:10184: $lt_compile\"" >&5) + (eval echo "\"\$as_me:10203: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:10188: \$? = $ac_status" >&5 + echo "$as_me:10207: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -10449,11 +10468,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:10452: $lt_compile\"" >&5) + (eval echo "\"\$as_me:10471: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:10456: \$? = $ac_status" >&5 + echo "$as_me:10475: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -10553,11 +10572,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:10556: $lt_compile\"" >&5) + (eval echo "\"\$as_me:10575: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:10560: \$? = $ac_status" >&5 + echo "$as_me:10579: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -12933,7 +12952,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext < conftest.$ac_ext <&5) + (eval echo "\"\$as_me:15407: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:15392: \$? = $ac_status" >&5 + echo "$as_me:15411: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -15489,11 +15508,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:15492: $lt_compile\"" >&5) + (eval echo "\"\$as_me:15511: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:15496: \$? = $ac_status" >&5 + echo "$as_me:15515: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -17059,11 +17078,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:17062: $lt_compile\"" >&5) + (eval echo "\"\$as_me:17081: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:17066: \$? = $ac_status" >&5 + echo "$as_me:17085: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -17163,11 +17182,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:17166: $lt_compile\"" >&5) + (eval echo "\"\$as_me:17185: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:17170: \$? = $ac_status" >&5 + echo "$as_me:17189: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -19361,11 +19380,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:19364: $lt_compile\"" >&5) + (eval echo "\"\$as_me:19383: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:19368: \$? = $ac_status" >&5 + echo "$as_me:19387: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -19629,11 +19648,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:19632: $lt_compile\"" >&5) + (eval echo "\"\$as_me:19651: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:19636: \$? = $ac_status" >&5 + echo "$as_me:19655: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -19733,11 +19752,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:19736: $lt_compile\"" >&5) + (eval echo "\"\$as_me:19755: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:19740: \$? = $ac_status" >&5 + echo "$as_me:19759: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized diff --git a/configure.in b/configure.in index 221415d..901f3fb 100644 --- a/configure.in +++ b/configure.in @@ -662,6 +662,25 @@ case "$CC_BASENAME" in PARALLEL="$CC_BASENAME" ;; + cmpicc) + dnl The ChaMPIon compiler on NCSA tungsten. The cmpirun command on tungsten + dnl isn't in the same path as cmpicc. + PARALLEL=cmpicc + AC_MSG_CHECKING([for cmpirun]) + + dnl Is there an cmpirun? + if test -x cmpirun; then + AC_MSG_RESULT([cmpirun]) + RUNSERIAL="${RUNSERIAL:-none}" + + if test -z "$RUNPARALLEL"; then + RUNPARALLEL="cmpirun -np \$\${NPROCS:=3}" + fi + else + AC_MSG_RESULT([none]) + fi + ;; + *) dnl Probably not a parallel compiler, but if `--enable-parallel' dnl is defined below then we're still building a parallel hdf5. diff --git a/src/H5Dio.c b/src/H5Dio.c index 6e75a93..259eb06 100644 --- a/src/H5Dio.c +++ b/src/H5Dio.c @@ -289,6 +289,7 @@ H5Dread(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id, H5D_t *dset = NULL; const H5S_t *mem_space = NULL; const H5S_t *file_space = NULL; + char fake_char; herr_t ret_value=SUCCEED; /* Return value */ FUNC_ENTER_API(H5Dread, FAIL) @@ -326,6 +327,13 @@ H5Dread(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id, if (!buf && H5S_GET_SELECT_NPOINTS(file_space)!=0) HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "no output buffer") + /* If the buffer is nil, and 0 element is selected, make a fake buffer. + * This is for some MPI package like ChaMPIon on NCSA's tungsten which + * doesn't support this feature. + */ + if (!buf) + buf = &fake_char; + /* read raw data */ if (H5D_read(dset, mem_type_id, mem_space, file_space, plist_id, buf/*out*/) < 0) HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "can't read data") @@ -373,6 +381,7 @@ H5Dwrite(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id, H5D_t *dset = NULL; const H5S_t *mem_space = NULL; const H5S_t *file_space = NULL; + char fake_char; herr_t ret_value=SUCCEED; /* Return value */ FUNC_ENTER_API(H5Dwrite, FAIL) @@ -409,6 +418,13 @@ H5Dwrite(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id, HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not xfer parms") if(!buf && H5S_GET_SELECT_NPOINTS(file_space)!=0) HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "no output buffer") + + /* If the buffer is nil, and 0 element is selected, make a fake buffer. + * This is for some MPI package like ChaMPIon on NCSA's tungsten which + * doesn't support this feature. + */ + if (!buf) + buf = &fake_char; /* write raw data */ if(H5D_write(dset, mem_type_id, mem_space, file_space, plist_id, buf) < 0) @@ -788,6 +804,8 @@ H5D_contig_read(H5D_io_info_t *io_info, hsize_t nelmts, FUNC_ENTER_NOAPI_NOINIT(H5D_contig_read) + assert (buf); + /* Initialize storage info for this dataset */ if (dataset->shared->dcpl_cache.efl.nused > 0) HDmemcpy(&store.efl, &(dataset->shared->dcpl_cache.efl), sizeof(H5O_efl_t)); @@ -1066,6 +1084,8 @@ H5D_contig_write(H5D_io_info_t *io_info, hsize_t nelmts, FUNC_ENTER_NOAPI_NOINIT(H5D_contig_write) + assert (buf); + /* Initialize storage info for this dataset */ if(dataset->shared->dcpl_cache.efl.nused > 0) HDmemcpy(&store.efl, &(dataset->shared->dcpl_cache.efl), sizeof(H5O_efl_t)); @@ -1344,6 +1364,8 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts, FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_read) + assert (buf); + /* Map elements between file and memory for each chunk*/ if(H5D_create_chunk_map(dataset, mem_type, file_space, mem_space, &fm) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't build chunk mapping") @@ -1734,6 +1756,8 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts, FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_write) + assert (buf); + /* Map elements between file and memory for each chunk*/ if(H5D_create_chunk_map(dataset, mem_type, file_space, mem_space, &fm) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't build chunk mapping") diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c index 90f8974..a6981ee 100644 --- a/src/H5Dmpio.c +++ b/src/H5Dmpio.c @@ -988,6 +988,7 @@ H5D_link_chunk_collective_io(H5D_io_info_t *io_info,fm_map *fm,const void *buf, for ( i = 0; i< num_chunk;i++){ if (MPI_SUCCESS != (mpi_code= MPI_Type_free( chunk_mtype+i ))) HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code); + if (MPI_SUCCESS != (mpi_code= MPI_Type_free( chunk_ftype+i ))) HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code); } @@ -1014,7 +1015,7 @@ H5D_link_chunk_collective_io(H5D_io_info_t *io_info,fm_map *fm,const void *buf, if(H5DEBUG(D)) HDfprintf(H5DEBUG(D),"before coming to final collective IO\n"); #endif - + if(H5D_final_collective_io(io_info,&chunk_final_ftype,&chunk_final_mtype,&coll_info,buf,do_write)<0) HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL,"couldn't finish MPI-IO"); @@ -1059,7 +1060,6 @@ done: static herr_t H5D_multi_chunk_collective_io(H5D_io_info_t *io_info,fm_map *fm,const void *buf, hbool_t do_write) { - unsigned i, total_chunk; hsize_t ori_total_chunk; uint8_t *chunk_io_option; @@ -1070,6 +1070,16 @@ H5D_multi_chunk_collective_io(H5D_io_info_t *io_info,fm_map *fm,const void *buf, H5D_storage_t store; /* union of EFL and chunk pointer in file space */ hbool_t select_chunk; hbool_t last_io_mode_coll = TRUE; + + void *chunk = NULL; /* Pointer to the data chunk in cache */ + H5D_t *dataset=io_info->dset;/* Local pointer to dataset info */ + H5D_istore_ud1_t udata; /*B-tree pass-through */ + haddr_t caddr; /* Address of the cached chunk */ + size_t accessed_bytes; /*total accessed size in a chunk */ + unsigned idx_hint=0; /* Cache index hint */ + hbool_t dirty = TRUE; /* Flag for cache flushing */ + hbool_t relax=TRUE; /* Whether whole chunk is selected */ + herr_t ret_value = SUCCEED; #ifdef H5Dmpio_DEBUG int mpi_rank; @@ -1167,18 +1177,41 @@ H5D_multi_chunk_collective_io(H5D_io_info_t *io_info,fm_map *fm,const void *buf, if(H5D_ioinfo_make_ind(io_info) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't switch to independent I/O") + /* Load the chunk into cache. But if the whole chunk is written, + * simply allocate space instead of load the chunk. */ + if(HADDR_UNDEF==(caddr = H5D_istore_get_addr(io_info, &udata))) + HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL,"couldn't get chunk info from skipped list"); + + if (H5D_istore_if_load(dataset, caddr)) { + accessed_bytes = chunk_info->chunk_points * H5T_get_size(dataset->shared->type); + if((do_write && (accessed_bytes != dataset->shared->layout.u.chunk.size)) || !do_write) + relax=FALSE; + + if(NULL == (chunk = H5D_istore_lock(io_info, &udata, relax, &idx_hint))) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to read raw data chunk") + } else + chunk = NULL; + if(do_write) { if((io_info->ops.write)(io_info, chunk_info->chunk_points,H5T_get_size(io_info->dset->shared->type), - chunk_info->fspace,chunk_info->mspace,0, NULL, buf) < 0) + chunk_info->fspace,chunk_info->mspace,caddr,chunk, buf) < 0) HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") } else { if((io_info->ops.read)(io_info, chunk_info->chunk_points,H5T_get_size(io_info->dset->shared->type), - chunk_info->fspace,chunk_info->mspace,0, NULL, buf) < 0) + chunk_info->fspace,chunk_info->mspace,caddr,chunk, buf) < 0) HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") - } + } + + /* Release the cache lock on the chunk. */ + if (H5D_istore_if_load(dataset, caddr)) { + if(!do_write) dirty = FALSE; + + if(H5D_istore_unlock(io_info, dirty, idx_hint, chunk, accessed_bytes) < 0) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to unlock raw data chunk") + } #else if(!last_io_mode_coll) /* using independent I/O with file setview.*/ @@ -1250,6 +1283,16 @@ H5D_multi_chunk_collective_io_no_opt(H5D_io_info_t *io_info,fm_map *fm,const voi haddr_t chunk_addr; H5SL_node_t *chunk_node; /* Current node in chunk skip list */ H5D_storage_t store; /* union of EFL and chunk pointer in file space */ + H5D_chunk_info_t *chunk_info; /* chunk information */ + hbool_t make_ind, make_coll; /* Flags to indicate that the MPI mode should change */ + + void *chunk = NULL; /* Pointer to the data chunk in cache */ + H5D_t *dataset=io_info->dset;/* Local pointer to dataset info */ + H5D_istore_ud1_t udata; /*B-tree pass-through */ + size_t accessed_bytes; /*total accessed size in a chunk */ + unsigned idx_hint=0; /* Cache index hint */ + hbool_t dirty = TRUE; /* Flag for cache flushing */ + hbool_t relax=TRUE; /* Whether whole chunk is selected */ herr_t ret_value = SUCCEED; #ifdef H5Dmpio_DEBUG @@ -1317,24 +1360,43 @@ H5D_multi_chunk_collective_io_no_opt(H5D_io_info_t *io_info,fm_map *fm,const voi if(H5D_ioinfo_make_ind(io_info) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't switch to independent I/O") + if(HADDR_UNDEF==(chunk_addr = H5D_istore_get_addr(io_info, &udata))) + HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL,"couldn't get chunk info from skipped list"); + if(make_ind) {/*independent I/O */ - - if(do_write) { - if((io_info->ops.write)(io_info, + /* Load the chunk into cache. But if the whole chunk is written, + * simply allocate space instead of load the chunk. */ + if (H5D_istore_if_load(dataset, chunk_addr)) { + accessed_bytes = chunk_info->chunk_points * H5T_get_size(dataset->shared->type); + if((do_write && (accessed_bytes != dataset->shared->layout.u.chunk.size)) || !do_write) + relax=FALSE; + + if(NULL == (chunk = H5D_istore_lock(io_info, &udata, relax, &idx_hint))) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to read raw data chunk") + } else + chunk = NULL; + + if(do_write) { + if((io_info->ops.write)(io_info, chunk_info->chunk_points,H5T_get_size(io_info->dset->shared->type), - chunk_info->fspace,chunk_info->mspace, (hsize_t)0, NULL, buf) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") - } - else { - if((io_info->ops.read)(io_info, + chunk_info->fspace,chunk_info->mspace, chunk_addr, chunk, buf) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed") + } else { + if((io_info->ops.read)(io_info, chunk_info->chunk_points,H5T_get_size(io_info->dset->shared->type), - chunk_info->fspace,chunk_info->mspace, (hsize_t)0, NULL, buf) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") - } + chunk_info->fspace,chunk_info->mspace, chunk_addr, chunk, buf) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed") + } + + /* Release the cache lock on the chunk. */ + if (H5D_istore_if_load(dataset, chunk_addr)) { + if(!do_write) dirty = FALSE; + + if(H5D_istore_unlock(io_info, dirty, idx_hint, chunk, accessed_bytes) < 0) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to unlock raw data chunk") + } } else { /*collective I/O */ - if(HADDR_UNDEF==(chunk_addr = H5D_istore_get_addr(io_info,NULL))) - HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL,"couldn't get chunk info from skipped list"); if(H5D_inter_collective_io(io_info,chunk_info->fspace,chunk_info->mspace, chunk_addr,buf,do_write ) < 0) HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL,"couldn't finish shared collective MPI-IO"); diff --git a/testpar/t_dset.c b/testpar/t_dset.c index 3c18d82..91349fc 100644 --- a/testpar/t_dset.c +++ b/testpar/t_dset.c @@ -2286,3 +2286,215 @@ compress_readAll(void) } #endif /* H5_HAVE_FILTER_DEFLATE */ +/* + * Part 4--Non-selection for chunked dataset + */ + +/* + * Example of using the parallel HDF5 library to create chunked + * dataset in one HDF5 file with collective and independent parallel + * MPIO access support. The Datasets are of sizes dim0 x dim1. + * Each process controls only a slab of size dim0 x dim1 within the + * dataset with the exception that one processor selects no element. + */ + +void +none_selection_chunk(void) +{ + hid_t fid; /* HDF5 file ID */ + hid_t acc_tpl; /* File access templates */ + hid_t xfer_plist; /* Dataset transfer properties list */ + hid_t sid; /* Dataspace ID */ + hid_t file_dataspace; /* File dataspace ID */ + hid_t mem_dataspace; /* memory dataspace ID */ + hid_t dataset1, dataset2; /* Dataset ID */ + hbool_t use_gpfs = FALSE; /* Use GPFS hints */ + const char *filename; + hsize_t dims[RANK]; /* dataset dim sizes */ + DATATYPE *data_origin = NULL; /* data buffer */ + DATATYPE *data_array = NULL; /* data buffer */ + hsize_t chunk_dims[RANK]; /* chunk sizes */ + hid_t dataset_pl; /* dataset create prop. list */ + + hsize_t start[RANK]; /* for hyperslab setting */ + hsize_t count[RANK]; /* for hyperslab setting */ + hsize_t stride[RANK]; /* for hyperslab setting */ + hsize_t block[RANK]; /* for hyperslab setting */ + hsize_t mstart[RANK]; /* for data buffer in memory */ + + herr_t ret; /* Generic return value */ + int mpi_size, mpi_rank; + + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Info info = MPI_INFO_NULL; + + filename = GetTestParameters(); + if (VERBOSE_MED) + printf("Extend independent write test on file %s\n", filename); + + /* set up MPI parameters */ + MPI_Comm_size(MPI_COMM_WORLD,&mpi_size); + MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank); + + /* setup chunk-size. Make sure sizes are > 0 */ + chunk_dims[0] = chunkdim0; + chunk_dims[1] = chunkdim1; + + /* ------------------- + * START AN HDF5 FILE + * -------------------*/ + /* setup file access template */ + acc_tpl = create_faccess_plist(comm, info, facc_type, use_gpfs); + VRFY((acc_tpl >= 0), ""); + + /* create the file collectively */ + fid=H5Fcreate(filename,H5F_ACC_TRUNC,H5P_DEFAULT,acc_tpl); + VRFY((fid >= 0), "H5Fcreate succeeded"); + + /* Release file-access template */ + ret=H5Pclose(acc_tpl); + VRFY((ret >= 0), ""); + + /* -------------------------------------------------------------- + * Define the dimensions of the overall datasets and create them. + * ------------------------------------------------------------- */ + + /* set up dataset storage chunk sizes and creation property list */ + if (VERBOSE_MED) + printf("chunks[]=%lu,%lu\n", (unsigned long)chunk_dims[0], (unsigned long)chunk_dims[1]); + dataset_pl = H5Pcreate(H5P_DATASET_CREATE); + VRFY((dataset_pl >= 0), "H5Pcreate succeeded"); + ret = H5Pset_chunk(dataset_pl, RANK, chunk_dims); + VRFY((ret >= 0), "H5Pset_chunk succeeded"); + + /* setup dimensionality object */ + dims[0] = dim0; + dims[1] = dim1; + sid = H5Screate_simple (RANK, dims, NULL); + VRFY((sid >= 0), "H5Screate_simple succeeded"); + + /* create an extendible dataset collectively */ + dataset1 = H5Dcreate(fid, DATASETNAME1, H5T_NATIVE_INT, sid, dataset_pl); + VRFY((dataset1 >= 0), "H5Dcreate succeeded"); + + /* create another extendible dataset collectively */ + dataset2 = H5Dcreate(fid, DATASETNAME2, H5T_NATIVE_INT, sid, dataset_pl); + VRFY((dataset2 >= 0), "H5Dcreate succeeded"); + + /* release resource */ + H5Sclose(sid); + H5Pclose(dataset_pl); + + /* ------------------------- + * Test collective writing to dataset1 + * -------------------------*/ + /* set up dimensions of the slab this process accesses */ + slab_set(mpi_rank, mpi_size, start, count, stride, block, BYROW); + + /* allocate memory for data buffer. Only allocate enough buffer for + * each processor's data. */ + if (mpi_rank) { + data_origin = (DATATYPE *)malloc(block[0]*block[1]*sizeof(DATATYPE)); + VRFY((data_origin != NULL), "data_origin malloc succeeded"); + + data_array = (DATATYPE *)malloc(block[0]*block[1]*sizeof(DATATYPE)); + VRFY((data_array != NULL), "data_array malloc succeeded"); + + /* put some trivial data in the data_array */ + mstart[0] = mstart[1] = 0; + dataset_fill(mstart, block, data_origin); + MESG("data_array initialized"); + if (VERBOSE_MED){ + MESG("data_array created"); + dataset_print(mstart, block, data_origin); + } + } + + /* create a memory dataspace independently */ + mem_dataspace = H5Screate_simple (RANK, block, NULL); + VRFY((mem_dataspace >= 0), ""); + + /* Process 0 has no selection */ + if (!mpi_rank) { + ret = H5Sselect_none(mem_dataspace); + VRFY((ret >= 0), "H5Sselect_none succeeded"); + } + + /* create a file dataspace independently */ + file_dataspace = H5Dget_space (dataset1); + VRFY((file_dataspace >= 0), "H5Dget_space succeeded"); + ret=H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((ret >= 0), "H5Sset_hyperslab succeeded"); + + /* Process 0 has no selection */ + if (!mpi_rank) { + ret = H5Sselect_none(file_dataspace); + VRFY((ret >= 0), "H5Sselect_none succeeded"); + } + + /* set up the collective transfer properties list */ + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + VRFY((xfer_plist >= 0), "H5Pcreate xfer succeeded"); + ret=H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); + + /* write data collectively */ + ret = H5Dwrite(dataset1, H5T_NATIVE_INT, mem_dataspace, file_dataspace, + xfer_plist, data_origin); + VRFY((ret >= 0), "H5Dwrite succeeded"); + + /* read data independently */ + ret = H5Dread(dataset1, H5T_NATIVE_INT, mem_dataspace, file_dataspace, + H5P_DEFAULT, data_array); + VRFY((ret >= 0), ""); + + /* verify the read data with original expected data */ + if (mpi_rank) { + ret = dataset_vrfy(mstart, count, stride, block, data_array, data_origin); + if (ret) nerrors++; + } + + /* ------------------------- + * Test independent writing to dataset2 + * -------------------------*/ + ret=H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_INDEPENDENT); + VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); + + /* write data collectively */ + ret = H5Dwrite(dataset2, H5T_NATIVE_INT, mem_dataspace, file_dataspace, + xfer_plist, data_origin); + VRFY((ret >= 0), "H5Dwrite succeeded"); + + /* read data independently */ + ret = H5Dread(dataset2, H5T_NATIVE_INT, mem_dataspace, file_dataspace, + H5P_DEFAULT, data_array); + VRFY((ret >= 0), ""); + + /* verify the read data with original expected data */ + if (mpi_rank) { + ret = dataset_vrfy(mstart, count, stride, block, data_array, data_origin); + if (ret) nerrors++; + } + + /* release resource */ + ret=H5Sclose(file_dataspace); + VRFY((ret >= 0), "H5Sclose succeeded"); + ret=H5Sclose(mem_dataspace); + VRFY((ret >= 0), "H5Sclose succeeded"); + ret=H5Pclose(xfer_plist); + VRFY((ret >= 0), "H5Pclose succeeded"); + + + /* close dataset collectively */ + ret=H5Dclose(dataset1); + VRFY((ret >= 0), "H5Dclose1 succeeded"); + ret=H5Dclose(dataset2); + VRFY((ret >= 0), "H5Dclose2 succeeded"); + + /* close the file collectively */ + H5Fclose(fid); + + /* release data buffers */ + if (data_origin) free(data_origin); + if (data_array) free(data_array); +} diff --git a/testpar/testphdf5.c b/testpar/testphdf5.c index a45bafa..11ffcb0 100644 --- a/testpar/testphdf5.c +++ b/testpar/testphdf5.c @@ -365,6 +365,9 @@ int main(int argc, char **argv) "extendible dataset collective read", PARATESTFILE); AddTest("eidsetw2", extend_writeInd2, NULL, "extendible dataset independent write #2", PARATESTFILE); + AddTest("selnone", none_selection_chunk, NULL, + "chunked dataset with none-selection", PARATESTFILE); + AddTest("calloc", test_chunk_alloc, NULL, "parallel extend Chunked allocation on serial file", PARATESTFILE); AddTest("fltread", test_filter_read, NULL, diff --git a/testpar/testphdf5.h b/testpar/testphdf5.h index 06878a5..8a6d897 100644 --- a/testpar/testphdf5.h +++ b/testpar/testphdf5.h @@ -211,6 +211,7 @@ void dataset_readInd(void); void dataset_readAll(void); void extend_readInd(void); void extend_readAll(void); +void none_selection_chunk(void); void test_chunk_alloc(void); void test_filter_read(void); void compact_dataset(void); -- cgit v0.12