diff options
author | Quincey Koziol <koziol@hdfgroup.org> | 2002-07-01 17:20:36 (GMT) |
---|---|---|
committer | Quincey Koziol <koziol@hdfgroup.org> | 2002-07-01 17:20:36 (GMT) |
commit | 7c1ec01796efbb3c934b65f90fe97f34791612b5 (patch) | |
tree | 8ba096c805385e1126945e9985bd75c58abc470e | |
parent | b9b0c7e6f916f82386e4eda5566b5a4b6138e9a4 (diff) | |
download | hdf5-7c1ec01796efbb3c934b65f90fe97f34791612b5.zip hdf5-7c1ec01796efbb3c934b65f90fe97f34791612b5.tar.gz hdf5-7c1ec01796efbb3c934b65f90fe97f34791612b5.tar.bz2 |
[svn-r5735] Purpose:
Lots of changes.
Description:
Added the "block size" stuff for MPI-I/O.
Rearranged the engine to use bytes as the basic unit for everything,
instead of converting back and forth between ints & bytes.
Hoisted lots of invariant code out of main benchmark loop.
Platforms tested:
IRIX64 6.5 (modi4) w/parallel
-rw-r--r-- | perform/pio_engine.c | 1345 | ||||
-rw-r--r-- | perform/pio_perf.c | 123 | ||||
-rw-r--r-- | perform/pio_perf.h | 6 |
3 files changed, 781 insertions, 693 deletions
diff --git a/perform/pio_engine.c b/perform/pio_engine.c index b43cf39..e396b52 100644 --- a/perform/pio_engine.c +++ b/perform/pio_engine.c @@ -45,9 +45,9 @@ /* sizes of various items. these sizes won't change during program execution */ /* The following three must have the same type */ -#define ELMT_SIZE (sizeof(int)) /* we're doing ints */ -#define ELMT_MPI_TYPE MPI_INT -#define ELMT_H5_TYPE H5T_NATIVE_INT +#define ELMT_SIZE (sizeof(unsigned char)) /* we're doing bytes */ +#define ELMT_MPI_TYPE MPI_BYTE +#define ELMT_H5_TYPE H5T_NATIVE_UCHAR #define GOTOERROR(errcode) { ret_code = errcode; goto done; } #define GOTODONE { goto done; } @@ -121,9 +121,9 @@ typedef union _file_descr { static char *pio_create_filename(iotype iot, const char *base_name, char *fullname, size_t size); static herr_t do_write(results *res, file_descr *fd, parameters *parms, - long ndsets, off_t nelmts, size_t blk_size, size_t buf_size, void *buffer); + long ndsets, off_t nelmts, size_t buf_size, void *buffer); static herr_t do_read(results *res, file_descr *fd, parameters *parms, - long ndsets, off_t nelmts, size_t blk_size, size_t buf_size, void *buffer /*out*/); + long ndsets, off_t nelmts, size_t buf_size, void *buffer /*out*/); static herr_t do_fopen(parameters *param, char *fname, file_descr *fd /*out*/, int flags); static herr_t do_fclose(iotype iot, file_descr *fd); @@ -131,6 +131,7 @@ static void do_cleanupfile(iotype iot, char *fname); #ifdef H5_HAVE_GPFS /* GPFS-specific functions */ +#ifdef H5_HAVE_GPFS static void access_range(int handle, off_t start, off_t length, int is_write); static void free_range(int handle, off_t start, off_t length); static void clear_file_cache(int handle); @@ -138,7 +139,7 @@ static void cancel_hints(int handle); static void start_data_shipping(int handle, int num_insts); static void stop_data_shipping(int handle); static void invalidate_file_cache(const char *filename); -#endif +#endif /* H5_HAVE_GPFS */ /* * Function: do_pio @@ -158,13 +159,12 @@ do_pio(parameters param) iotype iot; char fname[FILENAME_MAX]; - int maxprocs; - long nfiles, nf; + long nf; long ndsets; - off_t nelmts; + off_t nbytes; /* Number of bytes per dataset */ char *buffer = NULL; /*data buffer pointer */ size_t buf_size; /*data buffer size in bytes */ - size_t blk_size; /*interleaved I/O block size */ + size_t blk_size; /*data block size in bytes */ /* HDF5 variables */ herr_t hrc; /*HDF5 return code */ @@ -193,17 +193,15 @@ do_pio(parameters param) GOTOERROR(FAIL); } - nfiles = param.num_files; /* number of files */ ndsets = param.num_dsets; /* number of datasets per file */ - nelmts = param.num_elmts; /* number of elements per dataset */ - maxprocs = param.num_procs; /* max number of mpi-processes to use */ + nbytes = param.num_bytes; /* number of bytes per dataset */ buf_size = param.buf_size; - blk_size = param.block_size; /* interleaved IO block size */ + blk_size = param.blk_size; - if (nfiles < 0 ) { + if (param.num_files < 0 ) { fprintf(stderr, "number of files must be >= 0 (%ld)\n", - nfiles); + param.num_files); GOTOERROR(FAIL); } @@ -214,46 +212,44 @@ do_pio(parameters param) GOTOERROR(FAIL); } - if (maxprocs <= 0 ) { + if (param.num_procs <= 0 ) { fprintf(stderr, "maximum number of process to use must be > 0 (%d)\n", - maxprocs); + param.num_procs); GOTOERROR(FAIL); } - /* allocate transfer buffer */ + /* Validate transfer buffer size & block size*/ + if(blk_size<=0) { + HDfprintf(stderr, + "Transfer block size (%Hd) must be > 0\n", (long_long)blk_size); + GOTOERROR(FAIL); + } /* end if */ if(buf_size<=0) { HDfprintf(stderr, "Transfer buffer size (%Hd) must be > 0\n", (long_long)buf_size); GOTOERROR(FAIL); - }else{ - buffer = malloc(buf_size); - - if (buffer == NULL){ - HDfprintf(stderr, "malloc for transfer buffer size (%Hd) failed\n", - (long_long)buf_size); - GOTOERROR(FAIL); - } + } /* end if */ + if ((buf_size % blk_size) != 0){ + HDfprintf(stderr, + "Transfer buffer size (%Hd) must be a multiple of the " + "interleaved I/O block size (%Hd)\n", + (long_long)buf_size, (long_long)blk_size); + GOTOERROR(FAIL); } + if((nbytes%buf_size)!=0) { + HDfprintf(stderr, + "Dataset size (%Hd) must be a multiple of the " + "trasfer buffer size (%Hd)\n", + (long_long)nbytes, (long_long)buf_size); + GOTOERROR(FAIL); + } /* end if */ - /* Should only need blk_size <= buf_size. */ - /* More restrictive condition for easier implementation for now. */ - if (blk_size > 0 ){ - if ((buf_size % blk_size) != 0){ - HDfprintf(stderr, - "Transfer buffer size (%Hd) must be a multiple of the " - "interleaved I/O block size (%Hd)\n", - (long_long)buf_size, (long_long)blk_size); - GOTOERROR(FAIL); - } - - if ((nelmts % (buf_size / ELMT_SIZE)) != 0){ - HDfprintf(stderr, - "Dataset size (%Hd) must be a multiple of the " - "transfer buffer size (%Hd)\n", - (long_long)nelmts, (long_long)(buf_size / ELMT_SIZE)); - GOTOERROR(FAIL); - } + /* Allocate transfer buffer */ + if ((buffer = malloc(buf_size)) == NULL){ + HDfprintf(stderr, "malloc for transfer buffer size (%Hd) failed\n", + (long_long)(buf_size)); + GOTOERROR(FAIL); } if (pio_debug_level >= 4) { @@ -266,7 +262,7 @@ do_pio(parameters param) fprintf(output, "Timer details:\n"); } - for (nf = 1; nf <= nfiles; nf++) { + for (nf = 1; nf <= param.num_files; nf++) { /* * Write performance measurement */ @@ -285,7 +281,7 @@ do_pio(parameters param) VRFY((hrc == SUCCESS), "do_fopen failed"); set_time(res.timers, HDF5_FINE_WRITE_FIXED_DIMS, START); - hrc = do_write(&res, &fd, ¶m, ndsets, nelmts, blk_size, buf_size, buffer); + hrc = do_write(&res, &fd, ¶m, ndsets, nbytes, buf_size, buffer); set_time(res.timers, HDF5_FINE_WRITE_FIXED_DIMS, STOP); VRFY((hrc == SUCCESS), "do_write failed"); @@ -313,7 +309,7 @@ do_pio(parameters param) VRFY((hrc == SUCCESS), "do_fopen failed"); set_time(res.timers, HDF5_FINE_READ_FIXED_DIMS, START); - hrc = do_read(&res, &fd, ¶m, ndsets, nelmts, blk_size, buf_size, buffer); + hrc = do_read(&res, &fd, ¶m, ndsets, nbytes, buf_size, buffer); set_time(res.timers, HDF5_FINE_READ_FIXED_DIMS, STOP); VRFY((hrc == SUCCESS), "do_read failed"); @@ -478,24 +474,31 @@ pio_create_filename(iotype iot, const char *base_name, char *fullname, size_t si */ static herr_t do_write(results *res, file_descr *fd, parameters *parms, long ndsets, - off_t nelmts, size_t blk_size, size_t buf_size, void *buffer) + off_t nbytes, size_t buf_size, void *buffer) { int ret_code = SUCCESS; int rc; /*routine return code */ - int mrc; /*MPI return code */ - MPI_Offset mpi_offset; - MPI_Status mpi_status; long ndset; - off_t nelmts_xfer; - size_t nelmts_toxfer; + size_t blk_size; /* The block size to subdivide the xfer buffer into */ + off_t nbytes_xfer; /* Total number of bytes transferred so far */ + size_t nbytes_toxfer; /* Number of bytes to transfer a particular time */ char dname[64]; - off_t dset_offset=0; /*dataset offset in a file */ - off_t file_offset; /*file offset of the next transfer */ - off_t dset_size; /*one dataset size in bytes */ - size_t nelmts_in_buf; /*how many element the buffer holds */ - size_t nelmts_in_blk=0; /*how many element a block holds */ - off_t elmts_begin; /*first elmt this process transfer */ - off_t elmts_count; /*number of elmts this process transfer */ + off_t dset_offset=0; /*dataset offset in a file */ + off_t bytes_begin; /*first elmt this process transfer */ + off_t bytes_count; /*number of elmts this process transfer */ + unsigned char *buf_p; /* Current buffer pointer */ + + /* POSIX variables */ + off_t file_offset; /* Ffile offset of the next transfer */ + off_t posix_file_offset; /* Base file offset of the next transfer */ + + /* MPI variables */ + MPI_Offset mpi_file_offset;/* Base file offset of the next transfer*/ + MPI_Offset mpi_offset; /* Offset in MPI file */ + MPI_Datatype mpi_file_type; /* MPI derived type for file */ + MPI_Datatype mpi_blk_type; /* MPI derived type for buffer */ + MPI_Status mpi_status; + int mrc; /* MPI return code */ /* HDF5 variables */ herr_t hrc; /*HDF5 return code */ @@ -503,57 +506,146 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, hid_t h5dset_space_id = -1; /*dataset space ID */ hid_t h5mem_space_id = -1; /*memory dataspace ID */ hid_t h5ds_id = -1; /*dataset handle */ - hsize_t h5block[1]; /*dataspace selection */ + hsize_t h5block[1]; /*dataspace selection */ hsize_t h5stride[1]; hsize_t h5count[1]; hssize_t h5start[1]; + hssize_t h5offset[1]; /* Selection offset within dataspace */ hid_t h5dcpl = -1; /* Dataset creation property list */ hid_t h5dxpl = -1; /* Dataset transfer property list */ - /* calculate dataset parameters. data type is always native C int */ - dset_size = nelmts * (off_t)ELMT_SIZE; - nelmts_in_buf = buf_size/ELMT_SIZE; - - /* hdf5 data space setup */ - if (parms->io_type == PHDF5){ - if(nelmts>0) { - /* define a contiquous dataset of nelmts native ints */ - h5dims[0] = nelmts; - h5dset_space_id = H5Screate_simple(1, h5dims, NULL); - VRFY((h5dset_space_id >= 0), "H5Screate_simple"); - } /* end if */ - else { - h5dset_space_id = H5Screate(H5S_SCALAR); - VRFY((h5dset_space_id >= 0), "H5Screate"); - } /* end else */ - - /* Create the memory dataspace that corresponds to the xfer buffer */ - if(nelmts_in_buf>0) { - h5dims[0] = nelmts_in_buf; - h5mem_space_id = H5Screate_simple(1, h5dims, NULL); - VRFY((h5mem_space_id >= 0), "H5Screate_simple"); - } /* end if */ - else { - h5mem_space_id = H5Screate(H5S_SCALAR); - VRFY((h5mem_space_id >= 0), "H5Screate"); - } /* end else */ - - /* Create the dataset transfer property list */ - h5dxpl = H5Pcreate(H5P_DATASET_XFER); - if (h5dxpl < 0) { - fprintf(stderr, "HDF5 Property List Create failed\n"); - GOTOERROR(FAIL); - } + /* Get the parameters from the parameter block */ + blk_size=parms->blk_size; + + /* Prepare buffer for verifying data */ + if (parms->verify) + memset(buffer,pio_mpi_rank_g,buf_size); + + /* There are two kinds of transfer patterns, contiguous and interleaved. + * Let 0,1,2,...,n be data accessed by process 0,1,2,...,n + * where n is rank of the last process. + * In contiguous pattern, data are accessed as + * 000...111...222...nnn... + * In interleaved pattern, data are accessed as + * 012...n012...n... + * These are all in the scope of one dataset. + */ + if (parms->interleaved==0) { + /* Contiguous Pattern: */ + bytes_begin = (off_t)(((double)nbytes*pio_mpi_rank_g)/pio_mpi_nprocs_g); + } /* end if */ + else { + /* Interleaved Pattern: */ + bytes_begin = (off_t)(blk_size*pio_mpi_rank_g); + } /* end else */ + + /* Calculate the total number of bytes (bytes_count) to be + * transferred by this process. It may be different for different + * transfer pattern due to rounding to integral values. + */ + /* + * Calculate the beginning bytes of this process and the next. + * bytes_count is the difference between these two beginnings. + * This way, it eliminates any rounding errors. + * (This is tricky, don't mess with the formula, rounding errors + * can easily get introduced) */ + bytes_count = (off_t)(((double)nbytes*(pio_mpi_rank_g+1)) / pio_mpi_nprocs_g) + - (off_t)(((double)nbytes*pio_mpi_rank_g) / pio_mpi_nprocs_g); + + /* debug */ + if (pio_debug_level >= 4) { + HDprint_rank(output); + HDfprintf(output, "Debug(do_write): " + "buf_size=%Hd, bytes_begin=%Hd, bytes_count=%Hd\n", + (long_long)buf_size, (long_long)bytes_begin, + (long_long)bytes_count); + } - /* Change to collective I/O, if asked */ - if(parms->collective) { - hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE); - if (hrc < 0) { - fprintf(stderr, "HDF5 Property List Set failed\n"); + /* I/O Access specific setup */ + switch (parms->io_type) { + case POSIXIO: + /* No extra setup */ + break; + + case MPIO: /* MPI-I/O setup */ + /* Build block's derived type */ + mrc = MPI_Type_contiguous((int)blk_size, + MPI_BYTE, &mpi_blk_type); + VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Build file's derived type */ + mrc = MPI_Type_vector((int)(buf_size/blk_size), (int)1, + (int)pio_mpi_nprocs_g, mpi_blk_type, &mpi_file_type); + VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit file type */ + mrc = MPI_Type_commit( &mpi_file_type ); + VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Commit buffer type */ + mrc = MPI_Type_commit( &mpi_blk_type ); + VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + break; + + case PHDF5: /* HDF5 setup */ + if(nbytes>0) { + /* define a contiquous dataset of nbytes native bytes */ + h5dims[0] = nbytes; + h5dset_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + + /* Set up the file dset space id to select the pattern to access */ + if (parms->interleaved==0){ + /* Contiguous pattern */ + h5start[0] = bytes_begin; + h5stride[0] = h5block[0] = blk_size; + h5count[0] = buf_size/blk_size; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5start[0] = bytes_begin; + h5stride[0] = blk_size*pio_mpi_nprocs_g; + h5block[0] = blk_size; + h5count[0] = buf_size/blk_size; + } /* end else */ + hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, + h5start, h5stride, h5count, h5block); + VRFY((hrc >= 0), "H5Sselect_hyperslab"); + } /* end if */ + else { + h5dset_space_id = H5Screate(H5S_SCALAR); + VRFY((h5dset_space_id >= 0), "H5Screate"); + } /* end else */ + + /* Create the memory dataspace that corresponds to the xfer buffer */ + if(buf_size>0) { + h5dims[0] = buf_size; + h5mem_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + } /* end if */ + else { + h5mem_space_id = H5Screate(H5S_SCALAR); + VRFY((h5mem_space_id >= 0), "H5Screate"); + } /* end else */ + + /* Create the dataset transfer property list */ + h5dxpl = H5Pcreate(H5P_DATASET_XFER); + if (h5dxpl < 0) { + fprintf(stderr, "HDF5 Property List Create failed\n"); GOTOERROR(FAIL); + } + + /* Change to collective I/O, if asked */ + if(parms->collective) { + hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE); + if (hrc < 0) { + fprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } /* end if */ } /* end if */ - } /* end if */ - } + break; + } /* end switch */ for (ndset = 1; ndset <= ndsets; ++ndset) { @@ -564,7 +656,7 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, case POSIXIO: case MPIO: /* both posix and mpi io just need dataset offset in file*/ - dset_offset = (ndset - 1) * dset_size; + dset_offset = (ndset - 1) * nbytes; break; case PHDF5: @@ -577,7 +669,7 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, /* Make the dataset chunked if asked */ if(parms->h5_use_chunks) { /* Set the chunk size to be the same as the buffer size */ - h5dims[0] = nelmts_in_buf; + h5dims[0] = buf_size; hrc = H5Pset_chunk(h5dcpl, 1, h5dims); if (hrc < 0) { fprintf(stderr, "HDF5 Property List Set failed\n"); @@ -606,7 +698,7 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, } hrc = H5Pclose(h5dcpl); - /* verifying the close of the h5dcpl */ + /* verifying the close of the dcpl */ if (hrc < 0) { fprintf(stderr, "HDF5 Property List Close failed\n"); GOTOERROR(FAIL); @@ -615,268 +707,191 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, break; } - /* There are two kinds of transfer patterns, contiguous and interleaved. - * Let 0,1,2,...,n be data accessed by process 0,1,2,...,n - * where n is rank of the last process. - * In contiguous pattern, data are accessed as - * 000...111...222...nnn... - * In interleaved pattern, data are accessed as - * 012...n012...n... - * These are all in the scope of one dataset. - */ - /* Calculate the total number of elements (elmts_count) to be - * transferred by this process. It may be different for different - * transfer pattern due to rounding to integral values. - */ - if (blk_size==0){ - /* Contiguous Pattern: - * Calculate the beginning element of this process and the next. - * elmts_count is the difference between these two beginnings. - * This way, it eliminates any rounding errors. - */ - elmts_begin = (off_t)(((double)nelmts)/pio_mpi_nprocs_g*pio_mpi_rank_g); - - /* Do not cast elmt_begin to other types, especially non-integral - * types, else it may introduce rounding discrepency. */ - if (pio_mpi_rank_g < (pio_mpi_nprocs_g - 1)) - elmts_count = (off_t)(((double)nelmts) / pio_mpi_nprocs_g * (pio_mpi_rank_g + 1)) - - elmts_begin; - else - /* last process. Take whatever are left */ - elmts_count = nelmts - elmts_begin; - } /* Contiguous pattern */ - else{ - /* Interleaved Pattern: - * Each process takes blk_size of elements, starting with the first - * process. So, the last process may have fewer or even none. - * Calculate the beginning element of this process and the next. - * The elmnts_begin here marks only the beginning of the first - * block accessed by this process. - */ - /* Algorithm: - * First allocate equal blocks per process, i.e. one block each - * process for every block_size*nprocs. - * If there is remaining unallocated, give a block each to process - * starting at proc 0. The last process may get a partial block. - */ - off_t remain_nelmts, remain_begin; /* unallocated remaining*/ - - nelmts_in_blk = blk_size/ELMT_SIZE; - elmts_begin = (off_t)(nelmts_in_blk*pio_mpi_rank_g); - - /* must use integer calculation next */ - /* allocate equal blocks per process */ - elmts_count = (nelmts / (off_t)(nelmts_in_blk*pio_mpi_nprocs_g)) * - (off_t)nelmts_in_blk; - remain_nelmts = nelmts % ((off_t)(nelmts_in_blk*pio_mpi_nprocs_g)); - - /* allocate any remaining */ - remain_begin = (off_t)(nelmts_in_blk*pio_mpi_rank_g); - if (remain_nelmts > remain_begin){ - /* it gets something */ - if (remain_nelmts > (remain_begin+(off_t)nelmts_in_blk)){ - /* one full block */ - elmts_count += nelmts_in_blk; - }else{ - /* only a partial block */ - elmts_count += remain_nelmts - remain_begin; - } - } - } /* Interleaved Pattern */ - /* debug */ - if (pio_debug_level >= 4) { - HDprint_rank(output); - HDfprintf(output, "Debug(do_write): " - "nelmts_in_blk=%Hd, elmts_begin=%Hd, elmts_count=%Hd\n", - (long_long)nelmts_in_blk, (long_long)elmts_begin, - (long_long)elmts_count); - } - - - /* The task is to transfer elmts_count elements, starting at - * elmts_begin position, using transfer buffer of buf_size bytes. - * If blk_size > 0, select blk_size at a time, in round robin + /* The task is to transfer bytes_count bytes, starting at + * bytes_begin position, using transfer buffer of buf_size bytes. + * If interleaved, select buf_size at a time, in round robin * fashion, according to number of process. Otherwise, select - * all elmt_count in contiguous. + * all bytes_count in contiguous. */ - nelmts_xfer = 0 ; - - /* Start "raw data" write timer */ - set_time(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, START); + nbytes_xfer = 0 ; - while (nelmts_xfer < elmts_count){ - /* transfer one buffer of data each round */ - /* Note: because size_t is unsigned, avoid expressions that */ - /* can be negative. */ - if ((nelmts_xfer + (off_t)nelmts_in_buf) <= elmts_count) { - nelmts_toxfer = nelmts_in_buf; - } else { - /* last transfer of a partial buffer */ - nelmts_toxfer = elmts_count - nelmts_xfer; - } + /* Set base file offset for all I/O patterns and POSIX access */ + posix_file_offset = dset_offset + bytes_begin; - if (parms->verify) { - /*Prepare write data for verify later*/ - int *intptr = (int *)buffer; - size_t i; + /* Set base file offset for all I/O patterns and MPI access */ + mpi_file_offset = (MPI_Offset)(dset_offset + bytes_begin); - for (i = 0; i < nelmts_toxfer; ++i) - *intptr++ = pio_mpi_rank_g; - } + /* Start "raw data" write timer */ + set_time(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, START); + while (nbytes_xfer < bytes_count){ /* Write */ /* Calculate offset of write within a dataset/file */ switch (parms->io_type) { case POSIXIO: - if (blk_size==0){ - /* Contiguous pattern */ - /* need to (off_t) the elmnts_begin expression because they */ - /* may be of smaller sized integer types */ - file_offset = dset_offset + (off_t)(elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE; - - /* only care if seek returns error */ - rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; - VRFY((rc==0), "POSIXSEEK"); - /* check if all bytes are transferred */ - rc = ((ssize_t)(nelmts_toxfer*ELMT_SIZE) == - POSIXWRITE(fd->posixfd, buffer, nelmts_toxfer*ELMT_SIZE)); - VRFY((rc != 0), "POSIXWRITE"); - }else{ - /* interleaved access pattern */ - char *buf_p=buffer; - size_t xferred=0; - size_t toxfer=0; - - file_offset = dset_offset + - (off_t)(elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE; - if (pio_debug_level >= 4) { - HDprint_rank(output); - HDfprintf(output, "Debug(do_write): " - "nelmts_toxfer=%Hd, nelmts_xfer=%Hd\n", - (long_long)nelmts_toxfer, (long_long)nelmts_xfer); - } - while (xferred < nelmts_toxfer){ - if ((nelmts_toxfer - xferred) >= nelmts_in_blk) - toxfer = nelmts_in_blk; - else - toxfer = nelmts_toxfer - xferred; - /* Skip offset over blocks of other processes */ - file_offset = dset_offset + - (off_t)(elmts_begin + (nelmts_xfer+xferred)*pio_mpi_nprocs_g)*(off_t)ELMT_SIZE; - if (pio_debug_level >= 4) { - HDprint_rank(output); - HDfprintf(output, "Debug(do_write): " - "nelmts_toxfer=%Hd, nelmts_xfer=%Hd" - ", toxfer=%Hd, xferred=%Hd" - ", file_offset=%Hd" - "\n", - (long_long)nelmts_toxfer, (long_long)nelmts_xfer, - (long_long)toxfer, (long_long)xferred, - (long_long)file_offset); - } - /* only care if seek returns error */ - rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; - VRFY((rc==0), "POSIXSEEK"); - /* check if all bytes are written */ - rc = ((ssize_t)(toxfer*ELMT_SIZE) == - POSIXWRITE(fd->posixfd, buf_p, toxfer*ELMT_SIZE)); - VRFY((rc != 0), "POSIXWRITE"); - xferred += toxfer; - } - } - break; - - case MPIO: - if (blk_size==0) { - /* Contiguous pattern */ - mpi_offset = dset_offset + (elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE; - } /* Contiguous pattern */ + /* Contiguous pattern */ + if (parms->interleaved==0) { + /* Compute file offset */ + file_offset = posix_file_offset + (off_t)buf_size; + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc==0), "POSIXSEEK"); + + /* check if all bytes are written */ + rc = ((ssize_t)buf_size == + POSIXWRITE(fd->posixfd, buffer, buf_size)); + VRFY((rc != 0), "POSIXWRITE"); + + /* Advance global offset in dataset */ + nbytes_xfer+=buf_size; + } /* end if */ + /* Interleaved access pattern */ else { - /* Interleaved access pattern */ - /* Skip offset over blocks of other processes */ - mpi_offset = dset_offset + (elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g))*(off_t)ELMT_SIZE; + /* Set the base of user's buffer */ + buf_p=(unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size; + + /* Loop over the buffers to write */ + while(nbytes_toxfer>0) { + /* Skip offset over blocks of other processes */ + file_offset = posix_file_offset + + (off_t)(nbytes_xfer*pio_mpi_nprocs_g); + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc==0), "POSIXSEEK"); + + /* check if all bytes are written */ + rc = ((ssize_t)blk_size == + POSIXWRITE(fd->posixfd, buf_p, blk_size)); + VRFY((rc != 0), "POSIXWRITE"); + + /* Advance location in buffer */ + buf_p+=blk_size; + + /* Advance global offset in dataset */ + nbytes_xfer+=blk_size; + + /* Decrement number of bytes left this time */ + nbytes_toxfer-=blk_size; + } /* end while */ } /* end else */ + break; + case MPIO: + /* Independent file access */ if(parms->collective==0) { - mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buffer, - (int)nelmts_toxfer, ELMT_MPI_TYPE, - &mpi_status); - VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE"); + /* Contiguous pattern */ + if (parms->interleaved==0){ + /* Compute offset in file */ + mpi_offset = mpi_file_offset + + nbytes_xfer; + + /* Perform independent write */ + mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buffer, + (int)(buf_size/blk_size), mpi_blk_type, + &mpi_status); + VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance global offset in dataset */ + nbytes_xfer+=buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Set the base of user's buffer */ + buf_p=(unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size; + + /* Loop over the buffers to write */ + while(nbytes_toxfer>0) { + /* Skip offset over blocks of other processes */ + mpi_offset = mpi_file_offset + + (nbytes_xfer*pio_mpi_nprocs_g); + + /* Perform independent write */ + mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buf_p, + (int)1, mpi_blk_type, &mpi_status); + VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance location in buffer */ + buf_p+=blk_size; + + /* Advance global offset in dataset */ + nbytes_xfer+=blk_size; + + /* Decrement number of bytes left this time */ + nbytes_toxfer-=blk_size; + } /* end while */ + } /* end else */ } /* end if */ + /* Collective file access */ else { - mrc = MPI_File_write_at_all(fd->mpifd, mpi_offset, buffer, - (int)nelmts_toxfer, ELMT_MPI_TYPE, - &mpi_status); - VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE"); + /* Contiguous access pattern */ + if (parms->interleaved==0){ + /* Compute offset in file */ + mpi_offset = mpi_file_offset + + nbytes_xfer; + + /* Perform independent write */ + mrc = MPI_File_write_at_all(fd->mpifd, mpi_offset, buffer, + (int)(buf_size/blk_size), mpi_blk_type, &mpi_status); + VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance global offset in dataset */ + nbytes_xfer+=buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + + (nbytes_xfer*pio_mpi_nprocs_g); + + /* Set the file view */ + mrc = MPI_File_set_view(fd->mpifd, mpi_offset, mpi_blk_type, + mpi_file_type, (char*)"native", h5_io_info_g); + VRFY((mrc==MPI_SUCCESS), "MPIO_VIEW"); + + /* Perform write */ + mrc = MPI_File_write_at_all(fd->mpifd, 0, buffer, + (int)(buf_size/blk_size), mpi_blk_type, &mpi_status); + VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance global offset in dataset */ + nbytes_xfer+=buf_size; + } /* end else */ } /* end else */ break; case PHDF5: - /*set up the dset space id to select the segment to process */ - if (blk_size==0){ - /* Contiguous pattern */ - /* setup file selection */ - h5start[0] = elmts_begin + nelmts_xfer; - h5stride[0] = h5block[0] = nelmts_toxfer; - h5count[0] = 1; - hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, - h5start, h5stride, h5count, h5block); - VRFY((hrc >= 0), "H5Sset_hyperslab"); - - /*setup the memory selection. Only start is different */ - h5start[0] = 0; - hrc = H5Sselect_hyperslab(h5mem_space_id, H5S_SELECT_SET, - h5start, h5stride, h5count, h5block); - VRFY((hrc >= 0), "H5Sset_hyperslab"); - } /* Contiguous pattern */ - else { - /* Interleaved access pattern */ - /* setup file selection */ - /* Select one block, stride over nproc*block. */ - /* Repeat what the xfer buffer can hold */ - if (pio_debug_level >= 4) { - HDprint_rank(output); - HDfprintf(output, "Debug(do_write/PHDF5): " - "elmts_begin=%Hd, nelmts_xfer=%Hd" - ", nelmts_in_blk=%Hd, nelmts_in_buf=%Hd" - ", pio_mpi_nprocs_g=%Hd" - "\n", - (long_long)elmts_begin, (long_long)nelmts_xfer, - (long_long)nelmts_in_blk, (long_long)nelmts_in_buf, - (long_long)pio_mpi_nprocs_g); - } - h5start[0] = elmts_begin + nelmts_xfer*pio_mpi_nprocs_g; - h5block[0] = nelmts_in_blk; - h5stride[0] = nelmts_in_blk*pio_mpi_nprocs_g; - /* this requires we always use full xfer buffer */ - h5count[0] = nelmts_in_buf/nelmts_in_blk; - - hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, - h5start, h5stride, h5count, h5block); - VRFY((hrc >= 0), "H5Sset_hyperslab"); - - /* Setup the memory selection. */ - /* Only start and stride are different. */ - /* Could just use H5S_ALL. */ - h5start[0] = 0; - h5stride[0] = nelmts_in_blk; - - hrc = H5Sselect_hyperslab(h5mem_space_id, H5S_SELECT_SET, - h5start, h5stride, h5count, h5block); - VRFY((hrc >= 0), "H5Sset_hyperslab"); - } - - - /* set write time here */ + /* Set up the file dset space id to move the selection to process */ + if (parms->interleaved==0){ + /* Contiguous pattern */ + h5offset[0] = nbytes_xfer; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5offset[0] = (nbytes_xfer*pio_mpi_nprocs_g); + } /* end else */ + hrc = H5Soffset_simple(h5dset_space_id, h5offset); + VRFY((hrc >= 0), "H5Soffset_simple"); + + /* Write the buffer out */ hrc = H5Dwrite(h5ds_id, ELMT_H5_TYPE, h5mem_space_id, h5dset_space_id, h5dxpl, buffer); VRFY((hrc >= 0), "H5Dwrite"); + + /* Increment number of bytes transferred */ + nbytes_xfer += buf_size; + break; } /* switch (parms->io_type) */ - - /* Increment number of elements transferred */ - nelmts_xfer += nelmts_toxfer; - } + } /* end while */ /* Stop "raw data" write timer */ set_time(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, STOP); @@ -884,7 +899,7 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, /* Calculate write time */ /* Close dataset. Only HDF5 needs to do an explicit close. */ - if (parms->io_type == PHDF5){ + if (parms->io_type == PHDF5) { hrc = H5Dclose(h5ds_id); if (hrc < 0) { @@ -893,10 +908,21 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, } h5ds_id = -1; - } - } + } /* end if */ + } /* end for */ done: + /* release MPI-I/O objects */ + if (parms->io_type == MPIO) { + /* Free file type */ + mrc = MPI_Type_free( &mpi_file_type ); + VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free buffer type */ + mrc = MPI_Type_free( &mpi_blk_type ); + VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE"); + } /* end if */ + /* release HDF5 objects */ if (h5dset_space_id != -1) { hrc = H5Sclose(h5dset_space_id); @@ -929,7 +955,7 @@ done: } return ret_code; -} /* do_write */ +} /* * Function: do_read @@ -940,83 +966,176 @@ done: */ static herr_t do_read(results *res, file_descr *fd, parameters *parms, long ndsets, - off_t nelmts, size_t blk_size, size_t buf_size, void *buffer /*out*/) + off_t nbytes, size_t buf_size, void *buffer) { int ret_code = SUCCESS; int rc; /*routine return code */ - int mrc; /*MPI return code */ - MPI_Offset mpi_offset; - MPI_Status mpi_status; long ndset; - off_t nelmts_xfer; - size_t nelmts_toxfer; + size_t blk_size; /* The block size to subdivide the xfer buffer into */ + off_t nbytes_xfer; /* Total number of bytes transferred so far */ + size_t nbytes_toxfer; /* Number of bytes to transfer a particular time */ char dname[64]; - off_t dset_offset=0; /*dataset offset in a file */ - off_t file_offset; /*file offset of the next transfer */ - off_t dset_size; /*one dataset size in bytes */ - size_t nelmts_in_buf; /*how many element the buffer holds */ - size_t nelmts_in_blk=0; /*how many element a block holds */ - off_t elmts_begin; /*first elmt this process transfer */ - off_t elmts_count; /*number of elmts this process transfer */ + off_t dset_offset=0; /*dataset offset in a file */ + off_t bytes_begin; /*first elmt this process transfer */ + off_t bytes_count; /*number of elmts this process transfer */ + unsigned char *buf_p; /* Current buffer pointer */ + + /* POSIX variables */ + off_t file_offset; /* Ffile offset of the next transfer */ + off_t posix_file_offset; /* Base file offset of the next transfer */ + + /* MPI variables */ + MPI_Offset mpi_file_offset;/* Base file offset of the next transfer*/ + MPI_Offset mpi_offset; /* Offset in MPI file */ + MPI_Datatype mpi_file_type; /* MPI derived type for file */ + MPI_Datatype mpi_blk_type; /* MPI derived type for buffer */ + MPI_Status mpi_status; + int mrc; /* MPI return code */ /* HDF5 variables */ - herr_t hrc; /*HDF5 return code */ - hsize_t h5dims[1]; /*dataset dim sizes */ + herr_t hrc; /*HDF5 return code */ + hsize_t h5dims[1]; /*dataset dim sizes */ hid_t h5dset_space_id = -1; /*dataset space ID */ hid_t h5mem_space_id = -1; /*memory dataspace ID */ - hid_t h5ds_id = -1; /*dataset handle */ + hid_t h5ds_id = -1; /*dataset handle */ hsize_t h5block[1]; /*dataspace selection */ hsize_t h5stride[1]; hsize_t h5count[1]; hssize_t h5start[1]; + hssize_t h5offset[1]; /* Selection offset within dataspace */ hid_t h5dxpl = -1; /* Dataset transfer property list */ - /* calculate dataset parameters. data type is always native C int */ - dset_size = nelmts * (off_t)ELMT_SIZE; - nelmts_in_buf = buf_size/ELMT_SIZE; - - /* hdf5 data space setup */ - if (parms->io_type == PHDF5){ - if(nelmts>0) { - /* define a contiquous dataset of nelmts native ints */ - h5dims[0] = nelmts; - h5dset_space_id = H5Screate_simple(1, h5dims, NULL); - VRFY((h5dset_space_id >= 0), "H5Screate_simple"); - } /* end if */ - else { - h5dset_space_id = H5Screate(H5S_SCALAR); - VRFY((h5dset_space_id >= 0), "H5Screate"); - } /* end else */ - - /* Create the memory dataspace that corresponds to the xfer buffer */ - if(nelmts_in_buf>0) { - h5dims[0] = nelmts_in_buf; - h5mem_space_id = H5Screate_simple(1, h5dims, NULL); - VRFY((h5mem_space_id >= 0), "H5Screate_simple"); - } /* end if */ - else { - h5mem_space_id = H5Screate(H5S_SCALAR); - VRFY((h5mem_space_id >= 0), "H5Screate"); - } /* end else */ - - /* Create the dataset transfer property list */ - h5dxpl = H5Pcreate(H5P_DATASET_XFER); - if (h5dxpl < 0) { - fprintf(stderr, "HDF5 Property List Create failed\n"); - GOTOERROR(FAIL); - } + /* Get the parameters from the parameter block */ + blk_size=parms->blk_size; + + /* There are two kinds of transfer patterns, contiguous and interleaved. + * Let 0,1,2,...,n be data accessed by process 0,1,2,...,n + * where n is rank of the last process. + * In contiguous pattern, data are accessed as + * 000...111...222...nnn... + * In interleaved pattern, data are accessed as + * 012...n012...n... + * These are all in the scope of one dataset. + */ + if (parms->interleaved==0) { + /* Contiguous Pattern: */ + bytes_begin = (off_t)(((double)nbytes*pio_mpi_rank_g)/pio_mpi_nprocs_g); + } /* end if */ + else { + /* Interleaved Pattern: */ + bytes_begin = (off_t)(blk_size*pio_mpi_rank_g); + } /* end else */ + + /* Calculate the total number of bytes (bytes_count) to be + * transferred by this process. It may be different for different + * transfer pattern due to rounding to integral values. + */ + /* + * Calculate the beginning bytes of this process and the next. + * bytes_count is the difference between these two beginnings. + * This way, it eliminates any rounding errors. + * (This is tricky, don't mess with the formula, rounding errors + * can easily get introduced) */ + bytes_count = (off_t)(((double)nbytes*(pio_mpi_rank_g+1)) / pio_mpi_nprocs_g) + - (off_t)(((double)nbytes*pio_mpi_rank_g) / pio_mpi_nprocs_g); + + /* debug */ + if (pio_debug_level >= 4) { + HDprint_rank(output); + HDfprintf(output, "Debug(do_read): " + "buf_size=%Hd, bytes_begin=%Hd, bytes_count=%Hd\n", + (long_long)buf_size, (long_long)bytes_begin, + (long_long)bytes_count); + } - /* Change to collective I/O, if asked */ - if(parms->collective) { - hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE); - if (hrc < 0) { - fprintf(stderr, "HDF5 Property List Set failed\n"); + /* I/O Access specific setup */ + switch (parms->io_type) { + case POSIXIO: + /* No extra setup */ + break; + + case MPIO: /* MPI-I/O setup */ + /* Build block's derived type */ + mrc = MPI_Type_contiguous((int)blk_size, + MPI_BYTE, &mpi_blk_type); + VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Build file's derived type */ + mrc = MPI_Type_vector((int)(buf_size/blk_size), (int)1, + (int)pio_mpi_nprocs_g, mpi_blk_type, &mpi_file_type); + VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit file type */ + mrc = MPI_Type_commit( &mpi_file_type ); + VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Commit buffer type */ + mrc = MPI_Type_commit( &mpi_blk_type ); + VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + break; + + case PHDF5: /* HDF5 setup */ + if(nbytes>0) { + /* define a contiquous dataset of nbytes native bytes */ + h5dims[0] = nbytes; + h5dset_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + + /* Set up the file dset space id to select the pattern to access */ + if (parms->interleaved==0){ + /* Contiguous pattern */ + h5start[0] = bytes_begin; + h5stride[0] = h5block[0] = blk_size; + h5count[0] = buf_size/blk_size; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5start[0] = bytes_begin; + h5stride[0] = blk_size*pio_mpi_nprocs_g; + h5block[0] = blk_size; + h5count[0] = buf_size/blk_size; + } /* end else */ + hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, + h5start, h5stride, h5count, h5block); + VRFY((hrc >= 0), "H5Sselect_hyperslab"); + } /* end if */ + else { + h5dset_space_id = H5Screate(H5S_SCALAR); + VRFY((h5dset_space_id >= 0), "H5Screate"); + } /* end else */ + + /* Create the memory dataspace that corresponds to the xfer buffer */ + if(buf_size>0) { + h5dims[0] = buf_size; + h5mem_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + } /* end if */ + else { + h5mem_space_id = H5Screate(H5S_SCALAR); + VRFY((h5mem_space_id >= 0), "H5Screate"); + } /* end else */ + + /* Create the dataset transfer property list */ + h5dxpl = H5Pcreate(H5P_DATASET_XFER); + if (h5dxpl < 0) { + fprintf(stderr, "HDF5 Property List Create failed\n"); GOTOERROR(FAIL); + } + + /* Change to collective I/O, if asked */ + if(parms->collective) { + hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE); + if (hrc < 0) { + fprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } /* end if */ } /* end if */ - } /* end if */ - } /* end if */ + break; + } /* end switch */ for (ndset = 1; ndset <= ndsets; ++ndset) { + /* Calculate dataset offset within a file */ /* create dataset */ @@ -1024,7 +1143,7 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets, case POSIXIO: case MPIO: /* both posix and mpi io just need dataset offset in file*/ - dset_offset = (ndset - 1) * dset_size; + dset_offset = (ndset - 1) * nbytes; break; case PHDF5: @@ -1038,272 +1157,219 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets, break; } - /* There are two kinds of transfer patterns, contiguous and interleaved. - * Let 0,1,2,...,n be data accessed by process 0,1,2,...,n - * where n is rank of the last process. - * In contiguous pattern, data are accessed as - * 000...111...222...nnn... - * In interleaved pattern, data are accessed as - * 012...n012...n... - * These are all in the scope of one dataset. - */ - /* Calculate the total number of elements (elmts_count) to be - * transferred by this process. It may be different for different - * transfer pattern due to rounding to integral values. - */ - if (blk_size==0){ - /* Contiguous Pattern: - * Calculate the beginning element of this process and the next. - * elmts_count is the difference between these two beginnings. - * This way, it eliminates any rounding errors. - */ - elmts_begin = (off_t)(((double)nelmts)/pio_mpi_nprocs_g*pio_mpi_rank_g); - - /* Do not cast elmt_begin to other types, especially non-integral - * types, else it may introduce rounding discrepency. */ - if (pio_mpi_rank_g < (pio_mpi_nprocs_g - 1)) - elmts_count = (off_t)(((double)nelmts) / pio_mpi_nprocs_g * (pio_mpi_rank_g + 1)) - - elmts_begin; - else - /* last process. Take whatever are left */ - elmts_count = nelmts - elmts_begin; - }else{ - /* Interleaved Pattern: - * Each process takes blk_size of elements, starting with the first - * process. So, the last process may have fewer or even none. - * Calculate the beginning element of this process and the next. - * The elmnts_begin here marks only the beginning of the first - * block accessed by this process. - */ - /* Algorithm: - * First allocate equal blocks per process, i.e. one block each - * process for every block_size*nprocs. - * If there is remaining unallocated, give a block each to process - * starting at proc 0. The last process may get a partial block. - */ - off_t remain_nelmts, remain_begin; /* unallocated remaining*/ - - nelmts_in_blk = blk_size/ELMT_SIZE; - elmts_begin = (off_t)(nelmts_in_blk*pio_mpi_rank_g); - - /* must use integer calculation next */ - /* allocate equal blocks per process */ - elmts_count = (nelmts / (off_t)(nelmts_in_blk*pio_mpi_nprocs_g)) * - (off_t)nelmts_in_blk; - remain_nelmts = nelmts % ((off_t)(nelmts_in_blk*pio_mpi_nprocs_g)); - - /* allocate any remaining */ - remain_begin = (off_t)(nelmts_in_blk*pio_mpi_rank_g); - if (remain_nelmts > remain_begin){ - /* it gets something */ - if (remain_nelmts > (remain_begin+(off_t)nelmts_in_blk)){ - /* one full block */ - elmts_count += nelmts_in_blk; - }else{ - /* only a partial block */ - elmts_count += remain_nelmts - remain_begin; - } - } - } - /* debug */ - if (pio_debug_level >= 4) { - HDprint_rank(output); - HDfprintf(output, "Debug(do_read): " - "nelmts_in_blk=%Hd, elmts_begin=%Hd, elmts_count=%Hd\n", - (long_long)nelmts_in_blk, (long_long)elmts_begin, - (long_long)elmts_count); - } - - - /* The task is to transfer elmts_count elements, starting at - * elmts_begin position, using transfer buffer of buf_size bytes. - * If blk_size > 0, select blk_size at a time, in round robin + /* The task is to transfer bytes_count bytes, starting at + * bytes_begin position, using transfer buffer of buf_size bytes. + * If interleaved, select buf_size at a time, in round robin * fashion, according to number of process. Otherwise, select - * all elmt_count in contiguous. + * all bytes_count in contiguous. */ - nelmts_xfer = 0 ; + nbytes_xfer = 0 ; + + /* Set base file offset for all I/O patterns and POSIX access */ + posix_file_offset = dset_offset + bytes_begin; + + /* Set base file offset for all I/O patterns and MPI access */ + mpi_file_offset = (MPI_Offset)(dset_offset + bytes_begin); /* Start "raw data" read timer */ set_time(res->timers, HDF5_RAW_READ_FIXED_DIMS, START); - while (nelmts_xfer < elmts_count){ - /* transfer one buffer of data each round */ - /* Note: because size_t is unsigned, avoid expressions that */ - /* can be negative. */ - if ((nelmts_xfer + (off_t)nelmts_in_buf) <= elmts_count) { - nelmts_toxfer = nelmts_in_buf; - } else { - /* last transfer of a partial buffer */ - nelmts_toxfer = elmts_count - nelmts_xfer; - } - - /* read */ + while (nbytes_xfer < bytes_count){ + /* Read */ /* Calculate offset of read within a dataset/file */ - switch (parms->io_type){ + switch (parms->io_type) { case POSIXIO: - if (blk_size==0){ - /* Contiguous pattern */ - /* need to (off_t) the elmnts_begin expression because they */ - /* may be of smaller sized integer types */ - file_offset = dset_offset + (off_t)(elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE; - - /* only care if seek returns error */ - rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; - VRFY((rc==0), "POSIXSEEK"); - /* check if all bytes are transferred */ - rc = ((ssize_t)(nelmts_toxfer*ELMT_SIZE) == - POSIXREAD(fd->posixfd, buffer, nelmts_toxfer*ELMT_SIZE)); - VRFY((rc != 0), "POSIXREAD"); - }else{ - /* interleaved access pattern */ - char *buf_p=buffer; - size_t xferred=0; - size_t toxfer=0; - - file_offset = dset_offset + - (off_t)(elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE; - if (pio_debug_level >= 4) { - HDprint_rank(output); - HDfprintf(output, "Debug(do_read): " - "nelmts_toxfer=%Hd, nelmts_xfer=%Hd\n", - (long_long)nelmts_toxfer, (long_long)nelmts_xfer); - } - while (xferred < nelmts_toxfer){ - if ((nelmts_toxfer - xferred) >= nelmts_in_blk) - toxfer = nelmts_in_blk; - else - toxfer = nelmts_toxfer - xferred; - /* Skip offset over blocks of other processes */ - file_offset = dset_offset + - (off_t)(elmts_begin + (nelmts_xfer+xferred)*pio_mpi_nprocs_g)*(off_t)ELMT_SIZE; - if (pio_debug_level >= 4) { - HDprint_rank(output); - HDfprintf(output, "Debug(do_read):" - "nelmts_toxfer=%Hd, nelmts_xfer=%Hd" - ", toxfer=%Hd, xferred=%Hd" - ", file_offset=%Hd" - "\n", - (long_long)nelmts_toxfer, (long_long)nelmts_xfer, - (long_long)toxfer, (long_long)xferred, - (long_long)file_offset); - } - /* only care if seek returns error */ - rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; - VRFY((rc==0), "POSIXSEEK"); - /* check if all bytes are transferred */ - rc = ((ssize_t)(toxfer*ELMT_SIZE) == - POSIXREAD(fd->posixfd, buf_p, toxfer*ELMT_SIZE)); - VRFY((rc != 0), "POSIXREAD"); - xferred += toxfer; - } - } - break; - - case MPIO: - if (blk_size==0) { - /* Contiguous pattern */ - mpi_offset = dset_offset + (elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE; + /* Contiguous pattern */ + if (parms->interleaved==0) { + /* Compute file offset */ + file_offset = posix_file_offset + (off_t)buf_size; + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc==0), "POSIXSEEK"); + + /* check if all bytes are written */ + rc = ((ssize_t)buf_size == + POSIXREAD(fd->posixfd, buffer, buf_size)); + VRFY((rc != 0), "POSIXREAD"); + + /* Advance global offset in dataset */ + nbytes_xfer+=buf_size; } /* end if */ + /* Interleaved access pattern */ else { - /* Interleaved access pattern */ - /* Skip offset over blocks of other processes */ - mpi_offset = dset_offset + (elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g))*(off_t)ELMT_SIZE; + /* Set the base of user's buffer */ + buf_p=(unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size; + + /* Loop over the buffers to read */ + while(nbytes_toxfer>0) { + /* Skip offset over blocks of other processes */ + file_offset = posix_file_offset + + (off_t)(nbytes_xfer*pio_mpi_nprocs_g); + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc==0), "POSIXSEEK"); + + /* check if all bytes are written */ + rc = ((ssize_t)blk_size == + POSIXREAD(fd->posixfd, buf_p, blk_size)); + VRFY((rc != 0), "POSIXREAD"); + + /* Advance location in buffer */ + buf_p+=blk_size; + + /* Advance global offset in dataset */ + nbytes_xfer+=blk_size; + + /* Decrement number of bytes left this time */ + nbytes_toxfer-=blk_size; + } /* end while */ } /* end else */ + break; + case MPIO: + /* Independent file access */ if(parms->collective==0) { - mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buffer, - (int)nelmts_toxfer, ELMT_MPI_TYPE, - &mpi_status); - VRFY((mrc==MPI_SUCCESS), "MPIO_read"); + /* Contiguous pattern */ + if (parms->interleaved==0){ + /* Compute offset in file */ + mpi_offset = mpi_file_offset + + nbytes_xfer; + + /* Perform independent read */ + mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buffer, + (int)(buf_size/blk_size), mpi_blk_type, + &mpi_status); + VRFY((mrc==MPI_SUCCESS), "MPIO_READ"); + + /* Advance global offset in dataset */ + nbytes_xfer+=buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Set the base of user's buffer */ + buf_p=(unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size; + + /* Loop over the buffers to read */ + while(nbytes_toxfer>0) { + /* Skip offset over blocks of other processes */ + mpi_offset = mpi_file_offset + + (nbytes_xfer*pio_mpi_nprocs_g); + + /* Perform independent read */ + mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buf_p, + (int)1, mpi_blk_type, &mpi_status); + VRFY((mrc==MPI_SUCCESS), "MPIO_READ"); + + /* Advance location in buffer */ + buf_p+=blk_size; + + /* Advance global offset in dataset */ + nbytes_xfer+=blk_size; + + /* Decrement number of bytes left this time */ + nbytes_toxfer-=blk_size; + } /* end while */ + } /* end else */ } /* end if */ + /* Collective file access */ else { - mrc = MPI_File_read_at_all(fd->mpifd, mpi_offset, buffer, - (int)nelmts_toxfer, ELMT_MPI_TYPE, - &mpi_status); - VRFY((mrc==MPI_SUCCESS), "MPIO_read"); + /* Contiguous access pattern */ + if (parms->interleaved==0){ + /* Compute offset in file */ + mpi_offset = mpi_file_offset + + nbytes_xfer; + + /* Perform collective read */ + mrc = MPI_File_read_at_all(fd->mpifd, mpi_offset, buffer, + (int)(buf_size/blk_size), mpi_blk_type, &mpi_status); + VRFY((mrc==MPI_SUCCESS), "MPIO_READ"); + + /* Advance global offset in dataset */ + nbytes_xfer+=buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + + (nbytes_xfer*pio_mpi_nprocs_g); + + /* Set the file view */ + mrc = MPI_File_set_view(fd->mpifd, mpi_offset, mpi_blk_type, + mpi_file_type, (char*)"native", h5_io_info_g); + VRFY((mrc==MPI_SUCCESS), "MPIO_VIEW"); + + /* Perform collective read */ + mrc = MPI_File_read_at_all(fd->mpifd, 0, buffer, + (int)(buf_size/blk_size), mpi_blk_type, &mpi_status); + VRFY((mrc==MPI_SUCCESS), "MPIO_READ"); + + /* Advance global offset in dataset */ + nbytes_xfer+=buf_size; + } /* end else */ } /* end else */ break; case PHDF5: - /*set up the dset space id to select the segment to process */ - if (blk_size==0) { - /* Contiguous pattern */ - /* setup file selection */ - h5start[0] = elmts_begin + nelmts_xfer; - h5stride[0] = h5block[0] = nelmts_toxfer; - h5count[0] = 1; - hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, - h5start, h5stride, h5count, h5block); - VRFY((hrc >= 0), "H5Sset_hyperslab"); - - /*setup the memory selection. Only start is different */ - h5start[0] = 0; - hrc = H5Sselect_hyperslab(h5mem_space_id, H5S_SELECT_SET, - h5start, h5stride, h5count, h5block); - VRFY((hrc >= 0), "H5Sset_hyperslab"); - } /* Contiguous pattern */ - else { - /* Interleaved access pattern */ - /* setup file selection */ - /* Select one block, stride over nproc*block. */ - /* Repeat what the xfer buffer can hold */ - h5start[0] = elmts_begin + nelmts_xfer*pio_mpi_nprocs_g; - h5block[0] = nelmts_in_blk; - h5stride[0] = nelmts_in_blk*pio_mpi_nprocs_g; - /* this requires we always use full xfer buffer */ - h5count[0] = nelmts_in_buf/nelmts_in_blk; - - hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, - h5start, h5stride, h5count, h5block); - VRFY((hrc >= 0), "H5Sset_hyperslab"); - - /* Setup the memory selection. */ - /* Only start and stride are different. */ - /* Could just use H5S_ALL. */ - h5start[0] = 0; - h5stride[0] = nelmts_in_blk; - - hrc = H5Sselect_hyperslab(h5mem_space_id, H5S_SELECT_SET, - h5start, h5stride, h5count, h5block); - VRFY((hrc >= 0), "H5Sset_hyperslab"); + /* Set up the file dset space id to move the selection to process */ + if (parms->interleaved==0){ + /* Contiguous pattern */ + h5offset[0] = nbytes_xfer; } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5offset[0] = (nbytes_xfer*pio_mpi_nprocs_g); + } /* end else */ + hrc = H5Soffset_simple(h5dset_space_id, h5offset); + VRFY((hrc >= 0), "H5Soffset_simple"); - /* set read time here */ + /* Read the buffer in */ hrc = H5Dread(h5ds_id, ELMT_H5_TYPE, h5mem_space_id, - h5dset_space_id, h5dxpl, buffer); + h5dset_space_id, h5dxpl, buffer); VRFY((hrc >= 0), "H5Dread"); + + /* Increment number of bytes transferred */ + nbytes_xfer += buf_size; + break; } /* switch (parms->io_type) */ + /* Verify raw data, if asked */ if (parms->verify) { - /*verify read data*/ - int *intptr = (int *)buffer; + /* Verify data read */ + unsigned char *ucharptr = (unsigned char *)buffer; size_t i; int nerror=0; - for (i = 0; i < nelmts_toxfer; ++i){ - if (*intptr++ != pio_mpi_rank_g){ + for (i = 0; i < buf_size; ++i){ + if (*ucharptr++ != pio_mpi_rank_g) { if (++nerror < 20){ /* report at most 20 errors */ HDprint_rank(output); HDfprintf(output, "read data error, expected (%Hd), " "got (%Hd)\n", (long_long)pio_mpi_rank_g, - (long_long)*(intptr-1)); - } - } - } + (long_long)*(ucharptr-1)); + } /* end if */ + } /* end if */ + } /* end for */ if (nerror >= 20) { HDprint_rank(output); HDfprintf(output, "..."); - HDfprintf(output, "total read data errors=%Hd\n", - (long_long)nerror); - } + HDfprintf(output, "total read data errors=%d\n", + nerror); + } /* end if */ } /* if (parms->verify) */ - /* Increment number of elements transferred */ - nelmts_xfer += nelmts_toxfer; - } + } /* end while */ /* Stop "raw data" read timer */ set_time(res->timers, HDF5_RAW_READ_FIXED_DIMS, STOP); @@ -1311,7 +1377,7 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets, /* Calculate read time */ /* Close dataset. Only HDF5 needs to do an explicit close. */ - if (parms->io_type == PHDF5){ + if (parms->io_type == PHDF5) { hrc = H5Dclose(h5ds_id); if (hrc < 0) { @@ -1320,10 +1386,21 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets, } h5ds_id = -1; - } - } + } /* end if */ + } /* end for */ done: + /* release MPI-I/O objects */ + if (parms->io_type == MPIO) { + /* Free file type */ + mrc = MPI_Type_free( &mpi_file_type ); + VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free buffer type */ + mrc = MPI_Type_free( &mpi_blk_type ); + VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE"); + } /* end if */ + /* release HDF5 objects */ if (h5dset_space_id != -1) { hrc = H5Sclose(h5dset_space_id); @@ -1356,7 +1433,7 @@ done: } return ret_code; -} /* do_read */ +} /* * Function: do_fopen diff --git a/perform/pio_perf.c b/perform/pio_perf.c index 4b23e18..cc86c83 100644 --- a/perform/pio_perf.c +++ b/perform/pio_perf.c @@ -117,9 +117,9 @@ static const char *progname = "h5perf"; * adding more, make sure that they don't clash with each other. */ #if 1 -static const char *s_opts = "ha:A:cCD:e:P:p:X:x:nd:F:i:Io:stT:w"; +static const char *s_opts = "ha:A:B:cCd:D:e:F:i:Ino:p:P:stT:wx:X:"; #else -static const char *s_opts = "ha:A:bcCD:e:P:p:X:x:nd:F:i:Io:stT:w"; +static const char *s_opts = "ha:A:bB:cCd:D:e:F:i:Ino:p:P:stT:wx:X:"; #endif /* 1 */ static struct long_options l_opts[] = { { "help", no_arg, 'h' }, @@ -139,6 +139,15 @@ static struct long_options l_opts[] = { { "bin", no_arg, 'b' }, { "bi", no_arg, 'b' }, #endif /* 0 */ + { "block-size", require_arg, 'B' }, + { "block-siz", require_arg, 'B' }, + { "block-si", require_arg, 'B' }, + { "block-s", require_arg, 'B' }, + { "block-", require_arg, 'B' }, + { "block", require_arg, 'B' }, + { "bloc", require_arg, 'B' }, + { "blo", require_arg, 'B' }, + { "bl", require_arg, 'B' }, { "chunk", no_arg, 'c' }, { "chun", no_arg, 'c' }, { "chu", no_arg, 'c' }, @@ -156,14 +165,6 @@ static struct long_options l_opts[] = { { "debu", require_arg, 'D' }, { "deb", require_arg, 'D' }, { "de", require_arg, 'D' }, - { "num-elements", require_arg, 'e' }, - { "num-element", require_arg, 'e' }, - { "num-elemen", require_arg, 'e' }, - { "num-eleme", require_arg, 'e' }, - { "num-elem", require_arg, 'e' }, - { "num-ele", require_arg, 'e' }, - { "num-el", require_arg, 'e' }, - { "num-e", require_arg, 'e' }, { "interleaved", require_arg, 'I' }, { "interleave", require_arg, 'I' }, { "interleav", require_arg, 'I' }, @@ -214,6 +215,12 @@ static struct long_options l_opts[] = { { "no-f", no_arg, 'n' }, { "no-", no_arg, 'n' }, { "no", no_arg, 'n' }, + { "num-bytes", require_arg, 'e' }, + { "num-byte", require_arg, 'e' }, + { "num-byt", require_arg, 'e' }, + { "num-by", require_arg, 'e' }, + { "num-b", require_arg, 'e' }, + { "num-b", require_arg, 'e' }, { "num-dsets", require_arg, 'd' }, { "num-dset", require_arg, 'd' }, { "num-dse", require_arg, 'd' }, @@ -261,14 +268,15 @@ static struct long_options l_opts[] = { struct options { long io_types; /* bitmask of which I/O types to test */ const char *output_file; /* file to print report to */ - off_t num_elmts; /* number of elements per proc per dset */ long num_dsets; /* number of datasets */ long num_files; /* number of files */ + size_t num_bpp; /* number of bytes per proc per dset */ int num_iters; /* number of iterations */ int max_num_procs; /* maximum number of processes to use */ int min_num_procs; /* minimum number of processes to use */ size_t max_xfer_size; /* maximum transfer buffer size */ size_t min_xfer_size; /* minimum transfer buffer size */ + size_t blk_size; /* Block size */ unsigned interleaved; /* Interleaved vs. contiguous blocks */ unsigned collective; /* Collective vs. independent I/O */ int print_times; /* print times as well as throughputs */ @@ -408,10 +416,10 @@ run_test_loop(struct options *opts) int num_procs; int doing_pio; /* if this process is doing PIO */ - parms.num_elmts = opts->num_elmts; parms.num_files = opts->num_files; parms.num_dsets = opts->num_dsets; parms.num_iters = opts->num_iters; + parms.blk_size = opts->blk_size; parms.interleaved = opts->interleaved; parms.collective = opts->collective; parms.h5_align = opts->h5_alignment; @@ -441,15 +449,16 @@ run_test_loop(struct options *opts) for (buf_size = opts->min_xfer_size; buf_size <= opts->max_xfer_size; buf_size <<= 1) { parms.buf_size = buf_size; + parms.num_bytes = (off_t)opts->num_bpp*parms.num_procs; print_indent(1); output_report("Transfer Buffer Size: %ld bytes, File size: %.2f MBs\n", buf_size, - ((double)parms.num_dsets * (double)parms.num_elmts * - (double)parms.num_procs * (double)sizeof(int)) / ONE_MB); + ((double)parms.num_dsets * (double)parms.num_bytes) + / ONE_MB); print_indent(1); - output_report(" # of files: %ld, # of dsets: %ld, # of elmts per dset: %ld\n", - parms.num_files, parms.num_dsets, parms.num_elmts); + output_report(" # of files: %ld, # of datasets: %ld, dataset size: %.2f MBs\n", + parms.num_files, parms.num_dsets, (double)parms.num_bytes/ONE_MB); if (opts->io_types & PIO_POSIX) run_test(POSIXIO, parms, opts); @@ -461,7 +470,7 @@ run_test_loop(struct options *opts) run_test(PHDF5, parms, opts); /* Run the tests once if buf_size==0, but then break out */ - if (buf_size == 0) + if(buf_size==0) break; } @@ -503,8 +512,7 @@ run_test(iotype iot, parameters parms, struct options *opts) minmax read_gross_mm = {0.0, 0.0, 0.0, 0}; minmax read_raw_mm = {0.0, 0.0, 0.0, 0}; - raw_size = (off_t)parms.num_procs * (off_t)parms.num_dsets * - (off_t)parms.num_elmts * (off_t)sizeof(int); + raw_size = (off_t)parms.num_dsets * (off_t)parms.num_bytes; parms.io_type = iot; print_indent(2); output_report("IO API = "); @@ -525,16 +533,16 @@ run_test(iotype iot, parameters parms, struct options *opts) /* allocate space for tables minmax and that it is sufficient */ /* to initialize all elements to zeros by calloc. */ - write_mpi_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); - write_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); - write_gross_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); - write_raw_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); + write_mpi_mm_table = calloc((size_t)parms.num_iters , sizeof(minmax)); + write_mm_table = calloc((size_t)parms.num_iters , sizeof(minmax)); + write_gross_mm_table = calloc((size_t)parms.num_iters , sizeof(minmax)); + write_raw_mm_table = calloc((size_t)parms.num_iters , sizeof(minmax)); if (!parms.h5_write_only) { - read_mpi_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); - read_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); - read_gross_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); - read_raw_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); + read_mpi_mm_table = calloc((size_t)parms.num_iters , sizeof(minmax)); + read_mm_table = calloc((size_t)parms.num_iters , sizeof(minmax)); + read_gross_mm_table = calloc((size_t)parms.num_iters , sizeof(minmax)); + read_raw_mm_table = calloc((size_t)parms.num_iters , sizeof(minmax)); } /* Do IO iteration times, collecting statistics each time */ @@ -611,7 +619,7 @@ run_test(iotype iot, parameters parms, struct options *opts) output_all_info(write_raw_mm_table, parms.num_iters, 4); } - output_results(opts, "Raw Data Write", write_raw_mm_table, parms.num_iters, raw_size); + output_results(opts,"Raw Data Write",write_raw_mm_table,parms.num_iters,raw_size); } /* end if */ /* show mpi write statics */ @@ -632,7 +640,7 @@ run_test(iotype iot, parameters parms, struct options *opts) output_all_info(write_mm_table, parms.num_iters, 4); } - output_results(opts, "Write", write_mm_table, parms.num_iters, raw_size); + output_results(opts,"Write",write_mm_table,parms.num_iters,raw_size); /* accumulate and output the max, min, and average "gross write" times */ if (pio_debug_level >= 3) { @@ -642,7 +650,7 @@ run_test(iotype iot, parameters parms, struct options *opts) output_all_info(write_gross_mm_table, parms.num_iters, 4); } - output_results(opts, "Write Open-Close", write_gross_mm_table, parms.num_iters, raw_size); + output_results(opts,"Write Open-Close",write_gross_mm_table,parms.num_iters,raw_size); if (!parms.h5_write_only) { /* Read statistics */ @@ -877,7 +885,7 @@ destroy_comm_world(void) */ static void output_results(const struct options *opts, const char *name, minmax *table, - int table_size, off_t data_size) + int table_size,off_t data_size) { minmax total_mm; @@ -897,7 +905,7 @@ output_results(const struct options *opts, const char *name, minmax *table, print_indent(4); output_report("Average Throughput: %6.2f MB/s", - MB_PER_SEC(data_size, total_mm.sum / total_mm.num)); + MB_PER_SEC(data_size,total_mm.sum / total_mm.num)); if(opts->print_times) output_report(" (%7.3f s)\n", (total_mm.sum / total_mm.num)); else @@ -997,35 +1005,33 @@ report_parameters(struct options *opts) HDfprintf(output, "rank %d: IO API=", rank); print_io_api(opts->io_types); - HDfprintf(output, "rank %d: Number of elements per process per dataset=%Hd", - rank, (long_long)opts->num_elmts); + HDfprintf(output, "rank %d: Number of bytes per process per dataset=", rank); + recover_size_and_print((long_long)opts->num_bpp, "\n"); + HDfprintf(output, "rank %d: Number of files=%Hd\n", rank, - (long_long)opts->num_files); + (long_long)opts->num_files); HDfprintf(output, "rank %d: Number of datasets=%Hd\n", rank, - (long_long)opts->num_dsets); + (long_long)opts->num_dsets); HDfprintf(output, "rank %d: Number of iterations=%Hd\n", rank, - (long_long)opts->num_iters); + (long_long)opts->num_iters); HDfprintf(output, "rank %d: Number of processes=%d:%d\n", rank, - opts->min_num_procs, opts->max_num_procs); + opts->min_num_procs, opts->max_num_procs); HDfprintf(output, "rank %d: Size of dataset(s)=", rank); - recover_size_and_print((long_long)(opts->num_elmts * opts->min_num_procs), ":"); - recover_size_and_print((long_long)(opts->num_elmts * opts->max_num_procs), "\n"); + recover_size_and_print((long_long)(opts->num_bpp * opts->min_num_procs), ":"); + recover_size_and_print((long_long)(opts->num_bpp * opts->max_num_procs), "\n"); HDfprintf(output, "rank %d: File size=", rank); - recover_size_and_print((long_long)(opts->num_elmts * opts->min_num_procs - * opts->num_dsets * sizeof(int)), ":"); - recover_size_and_print((long_long)(opts->num_elmts * opts->max_num_procs - * opts->num_dsets * sizeof(int)), "\n"); - - /* - * dset size = elements per process * nprocs - * fsize = dsetsize * sizeof(int) * ndsets - */ + recover_size_and_print((long_long)(opts->num_bpp * opts->min_num_procs + * opts->num_dsets), ":"); + recover_size_and_print((long_long)(opts->num_bpp * opts->max_num_procs + * opts->num_dsets), "\n"); HDfprintf(output, "rank %d: Transfer buffer size=", rank); recover_size_and_print((long_long)opts->min_xfer_size, ":"); recover_size_and_print((long_long)opts->max_xfer_size, "\n"); + HDfprintf(output, "rank %d: Block size=", rank); + recover_size_and_print((long_long)opts->blk_size, "\n"); HDfprintf(output, "rank %d: Block Pattern in Dataset=", rank); if(opts->interleaved) @@ -1071,14 +1077,15 @@ parse_command_line(int argc, char *argv[]) cl_opts->output_file = NULL; cl_opts->io_types = 0; /* will set default after parsing options */ - cl_opts->num_elmts = 256 * ONE_KB; cl_opts->num_dsets = 1; cl_opts->num_files = 1; + cl_opts->num_bpp = 256 * ONE_KB; cl_opts->num_iters = 1; cl_opts->max_num_procs = comm_world_nprocs_g; cl_opts->min_num_procs = 1; cl_opts->max_xfer_size = 1 * ONE_MB; cl_opts->min_xfer_size = 128 * ONE_KB; + cl_opts->blk_size = 128 * ONE_KB; /* Default to writing 128K per block */ cl_opts->interleaved = 0; /* Default to contiguous blocks in dataset */ cl_opts->collective = 0; /* Default to independent I/O access */ cl_opts->print_times = FALSE; /* Printing times is off by default */ @@ -1134,6 +1141,9 @@ parse_command_line(int argc, char *argv[]) /* the future "binary" option */ break; #endif /* 0 */ + case 'B': + cl_opts->blk_size = parse_size_directive(opt_arg); + break; case 'c': /* Turn on chunked HDF5 dataset creation */ cl_opts->h5_use_chunks = TRUE; @@ -1203,7 +1213,7 @@ parse_command_line(int argc, char *argv[]) break; case 'e': - cl_opts->num_elmts = parse_size_directive(opt_arg); + cl_opts->num_bpp = parse_size_directive(opt_arg); break; case 'F': cl_opts->num_files = atoi(opt_arg); @@ -1333,13 +1343,14 @@ usage(const char *prog) #if 0 printf(" -b, --binary The elusive binary option\n"); #endif /* 0 */ + printf(" -B N, --block-size=N Block size within transfer buffer [default:128K]\n"); printf(" -c, --chunk Create HDF5 datasets chunked [default: off]\n"); printf(" -C, --collective Use collective I/O for MPI and HDF5 APIs\n"); printf(" [default: off (i.e. independent I/O)]\n"); printf(" -d N, --num-dsets=N Number of datasets per file [default:1]\n"); printf(" -D DL, --debug=DL Indicate the debugging level\n"); printf(" [default: no debugging]\n"); - printf(" -e S, --num-elements=S Number of elements per process per dataset\n"); + printf(" -e S, --num-bytes=S Number of bytes per process per dataset\n"); printf(" [default: 256K]\n"); printf(" -F N, --num-files=N Number of files [default: 1]\n"); printf(" -i, --num-iterations Number of iterations to perform [default: 1]\n"); @@ -1353,7 +1364,7 @@ usage(const char *prog) printf(" -o F, --output=F Output raw data into file F [default: none]\n"); printf(" -p N, --min-num-processes=N Minimum number of processes to use [default: 1]\n"); printf(" -P N, --max-num-processes=N Maximum number of processes to use\n"); - printf(" [default: all MPI_COMM_WORLD processes]\n"); + printf(" [default: all MPI_COMM_WORLD processes ]\n"); printf(" -T S, --threshold=S Threshold for alignment of objects in HDF5 file\n"); printf(" [default: 1]\n"); printf(" -w, --write-only Perform write tests not the read tests\n"); @@ -1361,13 +1372,13 @@ usage(const char *prog) printf(" -X S, --max-xfer-size=S Maximum transfer buffer size [default: 1M]\n"); printf("\n"); printf(" F - is a filename.\n"); - printf(" N - is an integer >= 0.\n"); - printf(" S - is a size specifier, an integer >= 0 followed by a size indicator:\n"); + printf(" N - is an integer >=0.\n"); + printf(" S - is a size specifier, an integer >=0 followed by a size indicator:\n"); printf(" K - Kilobyte (%d)\n", ONE_KB); printf(" M - Megabyte (%d)\n", ONE_MB); printf(" G - Gigabyte (%d)\n", ONE_GB); printf("\n"); - printf(" Example: 37M = 37 Megabytes = %d bytes\n", 37 * ONE_MB); + printf(" Example: 37M = 37 Megabytes = %d bytes\n", 37*ONE_MB); printf("\n"); printf(" AL - is an API list. Valid values are:\n"); printf(" phdf5 - Parallel HDF5\n"); diff --git a/perform/pio_perf.h b/perform/pio_perf.h index 46e51c4..a66aff0 100644 --- a/perform/pio_perf.h +++ b/perform/pio_perf.h @@ -36,10 +36,10 @@ typedef struct parameters_ { int num_procs; /* Maximum number of processes to use */ long num_files; /* Number of files to create */ long num_dsets; /* Number of datasets to create */ - off_t num_elmts; /* Number of native ints in each dset */ - int num_iters; /* Number of times to loop doing the IO */ + off_t num_bytes; /* Number of bytes in each dset */ + int num_iters; /* Number of times to loop doing the IO */ size_t buf_size; /* Buffer size */ - size_t block_size; /* interleaved block size */ + size_t blk_size; /* Block size */ unsigned interleaved; /* Interleaved vs. contiguous blocks */ unsigned collective; /* Collective vs. independent I/O */ hsize_t h5_align; /* HDF5 object alignment */ |