From 03ccb39fa9c857190f85b04174e01af4c3c1af32 Mon Sep 17 00:00:00 2001 From: Quincey Koziol Date: Thu, 27 Jun 2002 10:09:12 -0500 Subject: [svn-r5720] Purpose: New Feature Description: Added support for collective I/O with MPI and HDF5 access methods. Platforms tested: IRIX64 6.5 (modi4) w/parallel --- perform/pio_engine.c | 230 ++++++++++++++++++++++++++++++++------------------- perform/pio_perf.c | 38 +++++++-- perform/pio_perf.h | 1 + 3 files changed, 176 insertions(+), 93 deletions(-) diff --git a/perform/pio_engine.c b/perform/pio_engine.c index ca3ceeb..a9499d3 100644 --- a/perform/pio_engine.c +++ b/perform/pio_engine.c @@ -464,7 +464,6 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, size_t nelmts_in_buf; /*how many element the buffer holds */ off_t elmts_begin; /*first elmt this process transfer */ off_t elmts_count; /*number of elmts this process transfer */ - hid_t dcpl = -1; /* Dataset creation property list */ /* HDF5 variables */ herr_t hrc; /*HDF5 return code */ @@ -472,17 +471,12 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, hid_t h5dset_space_id = -1; /*dataset space ID */ hid_t h5mem_space_id = -1; /*memory dataspace ID */ hid_t h5ds_id = -1; /*dataset handle */ - hsize_t h5mem_block[1]; /*memory space selection */ - hsize_t h5mem_stride[1]; - hsize_t h5mem_count[1]; - hssize_t h5mem_start[1]; -#if 0 - /* for future implementation */ - hsize_t h5dset_block[1]; /*dset space selection */ - hsize_t h5dset_stride[1]; - hsize_t h5dset_count[1]; - hssize_t h5dset_start[1]; -#endif + hsize_t h5block[1]; /*dataspace selection */ + hsize_t h5stride[1]; + hsize_t h5count[1]; + hssize_t h5start[1]; + hid_t h5dcpl = -1; /* Dataset creation property list */ + hid_t h5dxpl = -1; /* Dataset transfer property list */ /* calculate dataset parameters. data type is always native C int */ dset_size = nelmts * (off_t)ELMT_SIZE; @@ -501,7 +495,7 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, VRFY((h5dset_space_id >= 0), "H5Screate"); } /* end else */ - /* create the memory dataspace that corresponds to the xfer buffer */ + /* Create the memory dataspace that corresponds to the xfer buffer */ if(nelmts_in_buf>0) { h5dims[0] = nelmts_in_buf; h5mem_space_id = H5Screate_simple(1, h5dims, NULL); @@ -511,6 +505,22 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, h5mem_space_id = H5Screate(H5S_SCALAR); VRFY((h5mem_space_id >= 0), "H5Screate"); } /* end else */ + + /* Create the dataset transfer property list */ + h5dxpl = H5Pcreate(H5P_DATASET_XFER); + if (h5dxpl < 0) { + fprintf(stderr, "HDF5 Property List Create failed\n"); + GOTOERROR(FAIL); + } + + /* Change to collective I/O, if asked */ + if(parms->collective) { + hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE); + if (hrc < 0) { + fprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } /* end if */ + } /* end if */ } for (ndset = 1; ndset <= ndsets; ++ndset) { @@ -526,8 +536,8 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, break; case PHDF5: - dcpl = H5Pcreate(H5P_DATASET_CREATE); - if (dcpl < 0) { + h5dcpl = H5Pcreate(H5P_DATASET_CREATE); + if (h5dcpl < 0) { fprintf(stderr, "HDF5 Property List Create failed\n"); GOTOERROR(FAIL); } @@ -536,7 +546,7 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, if(parms->h5_use_chunks) { /* Set the chunk size to be the same as the buffer size */ h5dims[0] = nelmts_in_buf; - hrc = H5Pset_chunk(dcpl, 1, h5dims); + hrc = H5Pset_chunk(h5dcpl, 1, h5dims); if (hrc < 0) { fprintf(stderr, "HDF5 Property List Set failed\n"); GOTOERROR(FAIL); @@ -546,7 +556,7 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, #ifdef H5_HAVE_NOFILL /* Disable writing fill values if asked */ if(parms->h5_no_fill) { - hrc = H5Pset_fill_time(dcpl, H5D_FILL_TIME_NEVER); + hrc = H5Pset_fill_time(h5dcpl, H5D_FILL_TIME_NEVER); if (hrc < 0) { fprintf(stderr, "HDF5 Property List Set failed\n"); GOTOERROR(FAIL); @@ -556,14 +566,14 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, sprintf(dname, "Dataset_%ld", ndset); h5ds_id = H5Dcreate(fd->h5fd, dname, ELMT_H5_TYPE, - h5dset_space_id, dcpl); + h5dset_space_id, h5dcpl); if (h5ds_id < 0) { fprintf(stderr, "HDF5 Dataset Create failed\n"); GOTOERROR(FAIL); } - hrc = H5Pclose(dcpl); + hrc = H5Pclose(h5dcpl); /* verifying the close of the dcpl */ if (hrc < 0) { fprintf(stderr, "HDF5 Property List Close failed\n"); @@ -718,44 +728,54 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, mpi_offset = dset_offset + (elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g))*(off_t)ELMT_SIZE; } /* end else */ - mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buffer, - (int)nelmts_toxfer, ELMT_MPI_TYPE, - &mpi_status); - VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE"); + if(parms->collective==0) { + mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buffer, + (int)nelmts_toxfer, ELMT_MPI_TYPE, + &mpi_status); + VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE"); + } /* end if */ + else { + mrc = MPI_File_write_at_all(fd->mpifd, mpi_offset, buffer, + (int)nelmts_toxfer, ELMT_MPI_TYPE, + &mpi_status); + VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE"); + } /* end else */ break; case PHDF5: - /*set up the dset space id to select the segment to process */ - { - if (parms->interleaved==0){ - /* Contiguous pattern */ - h5mem_start[0] = elmts_begin + nelmts_xfer; - } /* end if */ - else { - /* Interleaved access pattern */ - /* Skip offset over blocks of other processes */ - h5mem_start[0] = elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g); - } /* end else */ - h5mem_stride[0] = h5mem_block[0] = nelmts_toxfer; - h5mem_count[0] = 1; - hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, - h5mem_start, h5mem_stride, h5mem_count, h5mem_block); - VRFY((hrc >= 0), "H5Sset_hyperslab"); - - /*setup the memory space id too. Only start is different */ - h5mem_start[0] = 0; + /* Set up the file dset space id to select the segment to process */ + if (parms->interleaved==0){ + /* Contiguous pattern */ + h5start[0] = elmts_begin + nelmts_xfer; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5start[0] = elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g); + } /* end else */ + h5stride[0] = h5block[0] = nelmts_toxfer; + h5count[0] = 1; + hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, + h5start, h5stride, h5count, h5block); + VRFY((hrc >= 0), "H5Sset_hyperslab"); + + /* Only need selection in memory dataset if it is smaller than the whole buffer */ + if(nelmts_toxfer= 0), "H5Sset_hyperslab"); - } + } /* end if */ /* set write time here */ hrc = H5Dwrite(h5ds_id, ELMT_H5_TYPE, h5mem_space_id, - h5dset_space_id, H5P_DEFAULT, buffer); + h5dset_space_id, h5dxpl, buffer); VRFY((hrc >= 0), "H5Dwrite"); break; - } + } /* switch (parms->io_type) */ + /* Increment number of elements transferred */ nelmts_xfer += nelmts_toxfer; } @@ -799,6 +819,16 @@ done: } } + if (h5dxpl != -1) { + hrc = H5Pclose(h5dxpl); + if (hrc < 0) { + fprintf(stderr, "HDF5 Dataset Transfer Property List Close failed\n"); + ret_code = FAIL; + } else { + h5dxpl = -1; + } + } + return ret_code; } @@ -835,17 +865,11 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets, hid_t h5dset_space_id = -1; /*dataset space ID */ hid_t h5mem_space_id = -1; /*memory dataspace ID */ hid_t h5ds_id = -1; /*dataset handle */ - hsize_t h5mem_block[1]; /*memory space selection */ - hsize_t h5mem_stride[1]; - hsize_t h5mem_count[1]; - hssize_t h5mem_start[1]; -#if 0 - /* for future implementation */ - hsize_t h5dset_block[1]; /*dset space selection */ - hsize_t h5dset_stride[1]; - hsize_t h5dset_count[1]; - hssize_t h5dset_start[1]; -#endif + hsize_t h5block[1]; /*dataspace selection */ + hsize_t h5stride[1]; + hsize_t h5count[1]; + hssize_t h5start[1]; + hid_t h5dxpl = -1; /* Dataset transfer property list */ /* calculate dataset parameters. data type is always native C int */ dset_size = nelmts * (off_t)ELMT_SIZE; @@ -864,7 +888,7 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets, VRFY((h5dset_space_id >= 0), "H5Screate"); } /* end else */ - /* create the memory dataspace that corresponds to the xfer buffer */ + /* Create the memory dataspace that corresponds to the xfer buffer */ if(nelmts_in_buf>0) { h5dims[0] = nelmts_in_buf; h5mem_space_id = H5Screate_simple(1, h5dims, NULL); @@ -874,7 +898,23 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets, h5mem_space_id = H5Screate(H5S_SCALAR); VRFY((h5mem_space_id >= 0), "H5Screate"); } /* end else */ - } + + /* Create the dataset transfer property list */ + h5dxpl = H5Pcreate(H5P_DATASET_XFER); + if (h5dxpl < 0) { + fprintf(stderr, "HDF5 Property List Create failed\n"); + GOTOERROR(FAIL); + } + + /* Change to collective I/O, if asked */ + if(parms->collective) { + hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE); + if (hrc < 0) { + fprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } /* end if */ + } /* end if */ + } /* end if */ for (ndset = 1; ndset <= ndsets; ++ndset) { /* Calculate dataset offset within a file */ @@ -1035,40 +1075,49 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets, mpi_offset = dset_offset + (elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g))*(off_t)ELMT_SIZE; } /* end else */ - mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buffer, - (int)nelmts_toxfer, ELMT_MPI_TYPE, - &mpi_status); - VRFY((mrc==MPI_SUCCESS), "MPIO_read"); + if(parms->collective==0) { + mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buffer, + (int)nelmts_toxfer, ELMT_MPI_TYPE, + &mpi_status); + VRFY((mrc==MPI_SUCCESS), "MPIO_read"); + } /* end if */ + else { + mrc = MPI_File_read_at_all(fd->mpifd, mpi_offset, buffer, + (int)nelmts_toxfer, ELMT_MPI_TYPE, + &mpi_status); + VRFY((mrc==MPI_SUCCESS), "MPIO_read"); + } /* end else */ break; case PHDF5: - /*set up the dset space id to select the segment to process */ - { - if (parms->interleaved==0){ - /* Contiguous pattern */ - h5mem_start[0] = elmts_begin + nelmts_xfer; - } /* end if */ - else { - /* Interleaved access pattern */ - /* Skip offset over blocks of other processes */ - h5mem_start[0] = elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g); - } /* end else */ - h5mem_stride[0] = h5mem_block[0] = nelmts_toxfer; - h5mem_count[0] = 1; - hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, - h5mem_start, h5mem_stride, h5mem_count, h5mem_block); - VRFY((hrc >= 0), "H5Sset_hyperslab"); - - /*setup the memory space id too. Only start is different */ - h5mem_start[0] = 0; + /* Set up the dset space id to select the segment to process */ + if (parms->interleaved==0){ + /* Contiguous pattern */ + h5start[0] = elmts_begin + nelmts_xfer; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5start[0] = elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g); + } /* end else */ + h5stride[0] = h5block[0] = nelmts_toxfer; + h5count[0] = 1; + hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, + h5start, h5stride, h5count, h5block); + VRFY((hrc >= 0), "H5Sset_hyperslab"); + + /* Only need selection in memory dataset if it is smaller than the whole buffer */ + if(nelmts_toxfer= 0), "H5Sset_hyperslab"); - } + } /* end if */ /* set read time here */ hrc = H5Dread(h5ds_id, ELMT_H5_TYPE, h5mem_space_id, - h5dset_space_id, H5P_DEFAULT, buffer); + h5dset_space_id, h5dxpl, buffer); VRFY((hrc >= 0), "H5Dread"); break; } /* switch (parms->io_type) */ @@ -1099,6 +1148,7 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets, } } /* if (parms->verify) */ + /* Increment number of elements transferred */ nelmts_xfer += nelmts_toxfer; } @@ -1142,6 +1192,16 @@ done: } } + if (h5dxpl != -1) { + hrc = H5Pclose(h5dxpl); + if (hrc < 0) { + fprintf(stderr, "HDF5 Dataset Transfer Property List Close failed\n"); + ret_code = FAIL; + } else { + h5dxpl = -1; + } + } + return ret_code; } diff --git a/perform/pio_perf.c b/perform/pio_perf.c index f2fb52f..0f43f15 100644 --- a/perform/pio_perf.c +++ b/perform/pio_perf.c @@ -117,9 +117,9 @@ static const char *progname = "pio_perf"; * adding more, make sure that they don't clash with each other. */ #if 1 -static const char *s_opts = "ha:A:cD:f:P:p:X:x:nd:F:i:Io:stT:w"; +static const char *s_opts = "ha:A:cCD:f:P:p:X:x:nd:F:i:Io:stT:w"; #else -static const char *s_opts = "ha:A:bcD:f:P:p:X:x:nd:F:i:Io:stT:w"; +static const char *s_opts = "ha:A:bcCD:f:P:p:X:x:nd:F:i:Io:stT:w"; #endif /* 1 */ static struct long_options l_opts[] = { { "help", no_arg, 'h' }, @@ -143,6 +143,15 @@ static struct long_options l_opts[] = { { "chun", no_arg, 'c' }, { "chu", no_arg, 'c' }, { "ch", no_arg, 'c' }, + { "collective", no_arg, 'C' }, + { "collectiv", no_arg, 'C' }, + { "collecti", no_arg, 'C' }, + { "collect", no_arg, 'C' }, + { "collec", no_arg, 'C' }, + { "colle", no_arg, 'C' }, + { "coll", no_arg, 'C' }, + { "col", no_arg, 'C' }, + { "co", no_arg, 'C' }, { "debug", require_arg, 'D' }, { "debu", require_arg, 'D' }, { "deb", require_arg, 'D' }, @@ -260,6 +269,7 @@ struct options { size_t max_xfer_size; /* maximum transfer buffer size */ size_t min_xfer_size; /* minimum transfer buffer size */ unsigned interleaved; /* Interleaved vs. contiguous blocks */ + unsigned collective; /* Collective vs. independent I/O */ int print_times; /* print times as well as throughputs */ int print_raw; /* print raw data throughput info */ off_t h5_alignment; /* alignment in HDF5 file */ @@ -401,6 +411,7 @@ run_test_loop(struct options *opts) parms.num_dsets = opts->num_dsets; parms.num_iters = opts->num_iters; parms.interleaved = opts->interleaved; + parms.collective = opts->collective; parms.h5_align = opts->h5_alignment; parms.h5_thresh = opts->h5_threshold; parms.h5_use_chunks = opts->h5_use_chunks; @@ -1001,11 +1012,17 @@ report_parameters(struct options *opts) recover_size_and_print((long_long)opts->min_xfer_size, ":"); recover_size_and_print((long_long)opts->max_xfer_size, "\n"); - HDfprintf(output, "rank %d: Block Pattern in Dataset:", rank); + HDfprintf(output, "rank %d: Block Pattern in Dataset=", rank); if(opts->interleaved) - HDfprintf(output, "Interleaved"); + HDfprintf(output, "Interleaved\n"); else - HDfprintf(output, "Contiguous"); + HDfprintf(output, "Contiguous\n"); + + HDfprintf(output, "rank %d: I/O Method for MPI and HDF5=", rank); + if(opts->collective) + HDfprintf(output, "Collective\n"); + else + HDfprintf(output, "Independent\n"); { char *prefix = getenv("HDF5_PARAPREFIX"); @@ -1048,6 +1065,7 @@ parse_command_line(int argc, char *argv[]) cl_opts->max_xfer_size = 1 * ONE_MB; cl_opts->min_xfer_size = 128 * ONE_KB; cl_opts->interleaved = 0; /* Default to contiguous blocks in dataset */ + cl_opts->collective = 0; /* Default to independent I/O access */ cl_opts->print_times = FALSE; /* Printing times is off by default */ cl_opts->print_raw = FALSE; /* Printing raw data throughput is off by default */ cl_opts->h5_alignment = 1; /* No alignment for HDF5 objects by default */ @@ -1105,6 +1123,9 @@ parse_command_line(int argc, char *argv[]) /* Turn on chunked HDF5 dataset creation */ cl_opts->h5_use_chunks = TRUE; break; + case 'C': + cl_opts->collective = 1; + break; case 'd': cl_opts->num_dsets = atoi(opt_arg); break; @@ -1199,15 +1220,15 @@ parse_command_line(int argc, char *argv[]) case 'T': cl_opts->h5_threshold = parse_size_directive(opt_arg); break; + case 'w': + cl_opts->h5_write_only = TRUE; + break; case 'x': cl_opts->min_xfer_size = parse_size_directive(opt_arg); break; case 'X': cl_opts->max_xfer_size = parse_size_directive(opt_arg); break; - case 'w': - cl_opts->h5_write_only = TRUE; - break; case 'h': case '?': default: @@ -1298,6 +1319,7 @@ usage(const char *prog) printf(" -b, --binary The elusive binary option\n"); #endif /* 0 */ printf(" -c, --chunk Create HDF5 datasets chunked [default: off]\n"); + printf(" -C, --collective Use collective I/O for MPI and HDF5 APIs [default: off (i.e. independent I/O)]\n"); printf(" -d N, --num-dsets=N Number of datasets per file [default:1]\n"); printf(" -D DL, --debug=DL Indicate the debugging level\n"); printf(" [default: no debugging]\n"); diff --git a/perform/pio_perf.h b/perform/pio_perf.h index 53377e2..e245e8a 100644 --- a/perform/pio_perf.h +++ b/perform/pio_perf.h @@ -40,6 +40,7 @@ typedef struct parameters_ { int num_iters; /* Number of times to loop doing the IO */ size_t buf_size; /* Buffer size */ unsigned interleaved; /* Interleaved vs. contiguous blocks */ + unsigned collective; /* Collective vs. independent I/O */ hsize_t h5_align; /* HDF5 object alignment */ hsize_t h5_thresh; /* HDF5 object alignment threshold */ int h5_use_chunks; /* Make HDF5 dataset chunked */ -- cgit v0.12