summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2002-07-02 20:06:22 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2002-07-02 20:06:22 (GMT)
commite858a1310df466b41c2e743915d8b57f999aa4ad (patch)
tree1d28450e4e59dc51510c3a8f7a357ba5616ad9c1
parent10be6297fc94ac0867cf4cd44abcc0225beac8e5 (diff)
downloadhdf5-e858a1310df466b41c2e743915d8b57f999aa4ad.zip
hdf5-e858a1310df466b41c2e743915d8b57f999aa4ad.tar.gz
hdf5-e858a1310df466b41c2e743915d8b57f999aa4ad.tar.bz2
[svn-r5760] Purpose:
New features, etc. Description: Bring over all the recent changes from the release branch. Platforms tested: IRIX64 6.5 (modi4) w/parallel
-rw-r--r--perform/pio_engine.c1180
-rw-r--r--perform/pio_perf.c127
-rw-r--r--perform/pio_perf.h3
3 files changed, 803 insertions, 507 deletions
diff --git a/perform/pio_engine.c b/perform/pio_engine.c
index a9499d3..126d88a 100644
--- a/perform/pio_engine.c
+++ b/perform/pio_engine.c
@@ -45,9 +45,9 @@
/* sizes of various items. these sizes won't change during program execution */
/* The following three must have the same type */
-#define ELMT_SIZE (sizeof(int)) /* we're doing ints */
-#define ELMT_MPI_TYPE MPI_INT
-#define ELMT_H5_TYPE H5T_NATIVE_INT
+#define ELMT_SIZE (sizeof(unsigned char)) /* we're doing bytes */
+#define ELMT_MPI_TYPE MPI_BYTE
+#define ELMT_H5_TYPE H5T_NATIVE_UCHAR
#define GOTOERROR(errcode) { ret_code = errcode; goto done; }
#define GOTODONE { goto done; }
@@ -130,6 +130,7 @@ static herr_t do_fclose(iotype iot, file_descr *fd);
static void do_cleanupfile(iotype iot, char *fname);
/* GPFS-specific functions */
+#ifdef H5_HAVE_GPFS
static void access_range(int handle, off_t start, off_t length, int is_write);
static void free_range(int handle, off_t start, off_t length);
static void clear_file_cache(int handle);
@@ -137,6 +138,7 @@ static void cancel_hints(int handle);
static void start_data_shipping(int handle, int num_insts);
static void stop_data_shipping(int handle);
static void invalidate_file_cache(const char *filename);
+#endif /* H5_HAVE_GPFS */
/*
* Function: do_pio
@@ -156,12 +158,12 @@ do_pio(parameters param)
iotype iot;
char fname[FILENAME_MAX];
- int maxprocs;
- long nfiles, nf;
+ long nf;
long ndsets;
- off_t nelmts;
+ off_t nbytes; /* Number of bytes per dataset */
char *buffer = NULL; /*data buffer pointer */
size_t buf_size; /*data buffer size in bytes */
+ size_t blk_size; /*data block size in bytes */
/* HDF5 variables */
herr_t hrc; /*HDF5 return code */
@@ -190,16 +192,15 @@ do_pio(parameters param)
GOTOERROR(FAIL);
}
- nfiles = param.num_files; /* number of files */
ndsets = param.num_dsets; /* number of datasets per file */
- nelmts = param.num_elmts; /* number of elements per dataset */
- maxprocs = param.num_procs; /* max number of mpi-processes to use */
+ nbytes = param.num_bytes; /* number of bytes per dataset */
buf_size = param.buf_size;
+ blk_size = param.blk_size;
- if (nfiles < 0 ) {
+ if (param.num_files < 0 ) {
fprintf(stderr,
"number of files must be >= 0 (%ld)\n",
- nfiles);
+ param.num_files);
GOTOERROR(FAIL);
}
@@ -210,26 +211,51 @@ do_pio(parameters param)
GOTOERROR(FAIL);
}
- if (maxprocs <= 0 ) {
+ if (param.num_procs <= 0 ) {
fprintf(stderr,
"maximum number of process to use must be > 0 (%d)\n",
- maxprocs);
+ param.num_procs);
GOTOERROR(FAIL);
}
- /* allocate transfer buffer */
+ /* Validate transfer buffer size & block size*/
+ if(blk_size<=0) {
+ HDfprintf(stderr,
+ "Transfer block size (%Hd) must be > 0\n", (long_long)blk_size);
+ GOTOERROR(FAIL);
+ }
if(buf_size<=0) {
HDfprintf(stderr,
"Transfer buffer size (%Hd) must be > 0\n", (long_long)buf_size);
GOTOERROR(FAIL);
- }else{
- buffer = malloc(buf_size);
+ }
+ if ((buf_size % blk_size) != 0){
+ HDfprintf(stderr,
+ "Transfer buffer size (%Hd) must be a multiple of the "
+ "interleaved I/O block size (%Hd)\n",
+ (long_long)buf_size, (long_long)blk_size);
+ GOTOERROR(FAIL);
+ }
+ if((nbytes%pio_mpi_nprocs_g)!=0) {
+ HDfprintf(stderr,
+ "Dataset size (%Hd) must be a multiple of the "
+ "number of processes (%d)\n",
+ (long_long)nbytes, pio_mpi_nprocs_g);
+ GOTOERROR(FAIL);
+ }
+ if(((nbytes/pio_mpi_nprocs_g)%buf_size)!=0) {
+ HDfprintf(stderr,
+ "Dataset size/process (%Hd) must be a multiple of the "
+ "trasfer buffer size (%Hd)\n",
+ (long_long)(nbytes/pio_mpi_nprocs_g), (long_long)buf_size);
+ GOTOERROR(FAIL);
+ }
- if (buffer == NULL){
- HDfprintf(stderr, "malloc for transfer buffer size (%Hd) failed\n",
- (long_long)buf_size);
- GOTOERROR(FAIL);
- }
+ /* Allocate transfer buffer */
+ if ((buffer = malloc(buf_size)) == NULL){
+ HDfprintf(stderr, "malloc for transfer buffer size (%Hd) failed\n",
+ (long_long)(buf_size));
+ GOTOERROR(FAIL);
}
if (pio_debug_level >= 4) {
@@ -242,25 +268,26 @@ do_pio(parameters param)
fprintf(output, "Timer details:\n");
}
- for (nf = 1; nf <= nfiles; nf++) {
+ for (nf = 1; nf <= param.num_files; nf++) {
/*
* Write performance measurement
*/
/* Open file for write */
char base_name[256];
- MPI_Barrier(pio_comm_g);
-
sprintf(base_name, "#pio_tmp_%lu", nf);
pio_create_filename(iot, base_name, fname, sizeof(fname));
+ /* Need barrier to make sure everyone starts at the same time */
+ MPI_Barrier(pio_comm_g);
+
set_time(res.timers, HDF5_GROSS_WRITE_FIXED_DIMS, START);
hrc = do_fopen(&param, fname, &fd, PIO_CREATE | PIO_WRITE);
VRFY((hrc == SUCCESS), "do_fopen failed");
set_time(res.timers, HDF5_FINE_WRITE_FIXED_DIMS, START);
- hrc = do_write(&res, &fd, &param, ndsets, nelmts, buf_size, buffer);
+ hrc = do_write(&res, &fd, &param, ndsets, nbytes, buf_size, buffer);
set_time(res.timers, HDF5_FINE_WRITE_FIXED_DIMS, STOP);
VRFY((hrc == SUCCESS), "do_write failed");
@@ -275,6 +302,10 @@ do_pio(parameters param)
/*
* Read performance measurement
*/
+ /* Need barrier to make sure everyone is done writing and has
+ * closed the file. Also to make sure everyone starts reading
+ * at the same time.
+ */
MPI_Barrier(pio_comm_g);
/* Open file for read */
@@ -284,7 +315,7 @@ do_pio(parameters param)
VRFY((hrc == SUCCESS), "do_fopen failed");
set_time(res.timers, HDF5_FINE_READ_FIXED_DIMS, START);
- hrc = do_read(&res, &fd, &param, ndsets, nelmts, buf_size, buffer);
+ hrc = do_read(&res, &fd, &param, ndsets, nbytes, buf_size, buffer);
set_time(res.timers, HDF5_FINE_READ_FIXED_DIMS, STOP);
VRFY((hrc == SUCCESS), "do_read failed");
@@ -295,6 +326,8 @@ do_pio(parameters param)
VRFY((hrc == SUCCESS), "do_fclose failed");
}
+ /* Need barrier to make sure everyone is done with the file */
+ /* before it may be removed by do_cleanupfile */
MPI_Barrier(pio_comm_g);
do_cleanupfile(iot, fname);
}
@@ -447,23 +480,31 @@ pio_create_filename(iotype iot, const char *base_name, char *fullname, size_t si
*/
static herr_t
do_write(results *res, file_descr *fd, parameters *parms, long ndsets,
- off_t nelmts, size_t buf_size, void *buffer)
+ off_t nbytes, size_t buf_size, void *buffer)
{
int ret_code = SUCCESS;
int rc; /*routine return code */
- int mrc; /*MPI return code */
- MPI_Offset mpi_offset;
- MPI_Status mpi_status;
long ndset;
- off_t nelmts_xfer;
- size_t nelmts_toxfer;
+ size_t blk_size; /* The block size to subdivide the xfer buffer into */
+ off_t nbytes_xfer; /* Total number of bytes transferred so far */
+ size_t nbytes_toxfer; /* Number of bytes to transfer a particular time */
char dname[64];
- off_t dset_offset=0; /*dataset offset in a file */
- off_t file_offset; /*file offset of the next transfer */
- off_t dset_size; /*one dataset size in bytes */
- size_t nelmts_in_buf; /*how many element the buffer holds */
- off_t elmts_begin; /*first elmt this process transfer */
- off_t elmts_count; /*number of elmts this process transfer */
+ off_t dset_offset=0; /*dataset offset in a file */
+ off_t bytes_begin; /*first elmt this process transfer */
+ off_t bytes_count; /*number of elmts this process transfer */
+ unsigned char *buf_p; /* Current buffer pointer */
+
+ /* POSIX variables */
+ off_t file_offset; /* File offset of the next transfer */
+ off_t posix_file_offset; /* Base file offset of the next transfer */
+
+ /* MPI variables */
+ MPI_Offset mpi_file_offset;/* Base file offset of the next transfer*/
+ MPI_Offset mpi_offset; /* Offset in MPI file */
+ MPI_Datatype mpi_file_type; /* MPI derived type for file */
+ MPI_Datatype mpi_blk_type; /* MPI derived type for buffer */
+ MPI_Status mpi_status;
+ int mrc; /* MPI return code */
/* HDF5 variables */
herr_t hrc; /*HDF5 return code */
@@ -471,57 +512,146 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets,
hid_t h5dset_space_id = -1; /*dataset space ID */
hid_t h5mem_space_id = -1; /*memory dataspace ID */
hid_t h5ds_id = -1; /*dataset handle */
- hsize_t h5block[1]; /*dataspace selection */
+ hsize_t h5block[1]; /*dataspace selection */
hsize_t h5stride[1];
hsize_t h5count[1];
hssize_t h5start[1];
+ hssize_t h5offset[1]; /* Selection offset within dataspace */
hid_t h5dcpl = -1; /* Dataset creation property list */
hid_t h5dxpl = -1; /* Dataset transfer property list */
- /* calculate dataset parameters. data type is always native C int */
- dset_size = nelmts * (off_t)ELMT_SIZE;
- nelmts_in_buf = buf_size/ELMT_SIZE;
-
- /* hdf5 data space setup */
- if (parms->io_type == PHDF5){
- if(nelmts>0) {
- /* define a contiquous dataset of nelmts native ints */
- h5dims[0] = nelmts;
- h5dset_space_id = H5Screate_simple(1, h5dims, NULL);
- VRFY((h5dset_space_id >= 0), "H5Screate_simple");
- } /* end if */
- else {
- h5dset_space_id = H5Screate(H5S_SCALAR);
- VRFY((h5dset_space_id >= 0), "H5Screate");
- } /* end else */
-
- /* Create the memory dataspace that corresponds to the xfer buffer */
- if(nelmts_in_buf>0) {
- h5dims[0] = nelmts_in_buf;
- h5mem_space_id = H5Screate_simple(1, h5dims, NULL);
- VRFY((h5mem_space_id >= 0), "H5Screate_simple");
- } /* end if */
- else {
- h5mem_space_id = H5Screate(H5S_SCALAR);
- VRFY((h5mem_space_id >= 0), "H5Screate");
- } /* end else */
-
- /* Create the dataset transfer property list */
- h5dxpl = H5Pcreate(H5P_DATASET_XFER);
- if (h5dxpl < 0) {
- fprintf(stderr, "HDF5 Property List Create failed\n");
- GOTOERROR(FAIL);
- }
+ /* Get the parameters from the parameter block */
+ blk_size=parms->blk_size;
+
+ /* Prepare buffer for verifying data */
+ if (parms->verify)
+ memset(buffer,pio_mpi_rank_g,buf_size);
+
+ /* There are two kinds of transfer patterns, contiguous and interleaved.
+ * Let 0,1,2,...,n be data accessed by process 0,1,2,...,n
+ * where n is rank of the last process.
+ * In contiguous pattern, data are accessed as
+ * 000...111...222...nnn...
+ * In interleaved pattern, data are accessed as
+ * 012...n012...n...
+ * These are all in the scope of one dataset.
+ */
+ if (parms->interleaved==0) {
+ /* Contiguous Pattern: */
+ bytes_begin = (off_t)(((double)nbytes*pio_mpi_rank_g)/pio_mpi_nprocs_g);
+ } /* end if */
+ else {
+ /* Interleaved Pattern: */
+ bytes_begin = (off_t)(blk_size*pio_mpi_rank_g);
+ } /* end else */
+
+ /* Calculate the total number of bytes (bytes_count) to be
+ * transferred by this process. It may be different for different
+ * transfer pattern due to rounding to integral values.
+ */
+ /*
+ * Calculate the beginning bytes of this process and the next.
+ * bytes_count is the difference between these two beginnings.
+ * This way, it eliminates any rounding errors.
+ * (This is tricky, don't mess with the formula, rounding errors
+ * can easily get introduced) */
+ bytes_count = (off_t)(((double)nbytes*(pio_mpi_rank_g+1)) / pio_mpi_nprocs_g)
+ - (off_t)(((double)nbytes*pio_mpi_rank_g) / pio_mpi_nprocs_g);
+
+ /* debug */
+ if (pio_debug_level >= 4) {
+ HDprint_rank(output);
+ HDfprintf(output, "Debug(do_write): "
+ "buf_size=%Hd, bytes_begin=%Hd, bytes_count=%Hd\n",
+ (long_long)buf_size, (long_long)bytes_begin,
+ (long_long)bytes_count);
+ }
- /* Change to collective I/O, if asked */
- if(parms->collective) {
- hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE);
- if (hrc < 0) {
- fprintf(stderr, "HDF5 Property List Set failed\n");
+ /* I/O Access specific setup */
+ switch (parms->io_type) {
+ case POSIXIO:
+ /* No extra setup */
+ break;
+
+ case MPIO: /* MPI-I/O setup */
+ /* Build block's derived type */
+ mrc = MPI_Type_contiguous((int)blk_size,
+ MPI_BYTE, &mpi_blk_type);
+ VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
+
+ /* Build file's derived type */
+ mrc = MPI_Type_vector((int)(buf_size/blk_size), (int)1,
+ (int)pio_mpi_nprocs_g, mpi_blk_type, &mpi_file_type);
+ VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
+
+ /* Commit file type */
+ mrc = MPI_Type_commit( &mpi_file_type );
+ VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
+
+ /* Commit buffer type */
+ mrc = MPI_Type_commit( &mpi_blk_type );
+ VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
+ break;
+
+ case PHDF5: /* HDF5 setup */
+ if(nbytes>0) {
+ /* define a contiquous dataset of nbytes native bytes */
+ h5dims[0] = nbytes;
+ h5dset_space_id = H5Screate_simple(1, h5dims, NULL);
+ VRFY((h5dset_space_id >= 0), "H5Screate_simple");
+
+ /* Set up the file dset space id to select the pattern to access */
+ if (parms->interleaved==0){
+ /* Contiguous pattern */
+ h5start[0] = bytes_begin;
+ h5stride[0] = h5block[0] = blk_size;
+ h5count[0] = buf_size/blk_size;
+ } /* end if */
+ else {
+ /* Interleaved access pattern */
+ /* Skip offset over blocks of other processes */
+ h5start[0] = bytes_begin;
+ h5stride[0] = blk_size*pio_mpi_nprocs_g;
+ h5block[0] = blk_size;
+ h5count[0] = buf_size/blk_size;
+ } /* end else */
+ hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET,
+ h5start, h5stride, h5count, h5block);
+ VRFY((hrc >= 0), "H5Sselect_hyperslab");
+ } /* end if */
+ else {
+ h5dset_space_id = H5Screate(H5S_SCALAR);
+ VRFY((h5dset_space_id >= 0), "H5Screate");
+ } /* end else */
+
+ /* Create the memory dataspace that corresponds to the xfer buffer */
+ if(buf_size>0) {
+ h5dims[0] = buf_size;
+ h5mem_space_id = H5Screate_simple(1, h5dims, NULL);
+ VRFY((h5mem_space_id >= 0), "H5Screate_simple");
+ } /* end if */
+ else {
+ h5mem_space_id = H5Screate(H5S_SCALAR);
+ VRFY((h5mem_space_id >= 0), "H5Screate");
+ } /* end else */
+
+ /* Create the dataset transfer property list */
+ h5dxpl = H5Pcreate(H5P_DATASET_XFER);
+ if (h5dxpl < 0) {
+ fprintf(stderr, "HDF5 Property List Create failed\n");
GOTOERROR(FAIL);
+ }
+
+ /* Change to collective I/O, if asked */
+ if(parms->collective) {
+ hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE);
+ if (hrc < 0) {
+ fprintf(stderr, "HDF5 Property List Set failed\n");
+ GOTOERROR(FAIL);
+ } /* end if */
} /* end if */
- } /* end if */
- }
+ break;
+ } /* end switch */
for (ndset = 1; ndset <= ndsets; ++ndset) {
@@ -532,7 +662,7 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets,
case POSIXIO:
case MPIO:
/* both posix and mpi io just need dataset offset in file*/
- dset_offset = (ndset - 1) * dset_size;
+ dset_offset = (ndset - 1) * nbytes;
break;
case PHDF5:
@@ -545,7 +675,7 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets,
/* Make the dataset chunked if asked */
if(parms->h5_use_chunks) {
/* Set the chunk size to be the same as the buffer size */
- h5dims[0] = nelmts_in_buf;
+ h5dims[0] = buf_size;
hrc = H5Pset_chunk(h5dcpl, 1, h5dims);
if (hrc < 0) {
fprintf(stderr, "HDF5 Property List Set failed\n");
@@ -583,201 +713,191 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets,
break;
}
- /* There are two kinds of transfer patterns, contiguous and interleaved.
- * Let 0,1,2,...,n be data accessed by process 0,1,2,...,n
- * where n is rank of the last process.
- * In contiguous pattern, data are accessed as
- * 000...111...222...nnn...
- * In interleaved pattern, data are accessed as
- * 012...n012...n...
- * These are all in the scope of one dataset.
- */
- /* Calculate the total number of elements (elmts_count) to be
- * transferred by this process. It may be different for different
- * transfer pattern due to rounding to integral values.
- */
- if (parms->interleaved==0) {
- /* Contiguous Pattern:
- * Calculate the beginning element of this process and the next.
- * elmts_count is the difference between these two beginnings.
- * This way, it eliminates any rounding errors.
- */
- elmts_begin = (off_t)(((double)nelmts)/pio_mpi_nprocs_g*pio_mpi_rank_g);
-
- /* Do not cast elmt_begin to other types, especially non-integral
- * types, else it may introduce rounding discrepency. */
- if (pio_mpi_rank_g < (pio_mpi_nprocs_g - 1))
- elmts_count = (off_t)(((double)nelmts) / pio_mpi_nprocs_g * (pio_mpi_rank_g + 1))
- - elmts_begin;
- else
- /* last process. Take whatever are left */
- elmts_count = nelmts - elmts_begin;
- } /* end if */
- else {
- /* Interleaved Pattern:
- * Each process takes buf_size of elements, starting with the first
- * process. So, the last process may have fewer or even none.
- * Calculate the beginning element of this process.
- * The elmnts_begin here marks only the beginning of the first
- * block accessed by this process.
- */
- /* Algorithm:
- * First allocate equal blocks per process, i.e. one block each
- * process for every block_size*nprocs.
- * If there is remaining unallocated, give a block each to process
- * starting at proc 0. The last process may get a partial block.
- */
- off_t remain_nelmts, remain_begin; /* unallocated remaining*/
-
- elmts_begin = (off_t)(nelmts_in_buf*pio_mpi_rank_g);
-
- /* must use integer calculation next */
- /* allocate equal blocks per process */
- elmts_count = (nelmts / (off_t)(nelmts_in_buf*pio_mpi_nprocs_g)) *
- (off_t)nelmts_in_buf;
- remain_nelmts = nelmts % (off_t)(nelmts_in_buf*pio_mpi_nprocs_g);
-
- /* allocate any remaining */
- remain_begin = (off_t)(nelmts_in_buf*pio_mpi_rank_g);
- if (remain_nelmts > remain_begin){
- /* it gets something */
- if (remain_nelmts > (remain_begin+(off_t)nelmts_in_buf)){
- /* one full block */
- elmts_count += nelmts_in_buf;
- }else{
- /* only a partial block */
- elmts_count += remain_nelmts - remain_begin;
- }
- }
- }
- /* debug */
- if (pio_debug_level >= 4) {
- HDprint_rank(output);
- HDfprintf(output, "Debug(do_write): "
- "nelmts_in_buf=%Hd, elmts_begin=%Hd, elmts_count=%Hd\n",
- (long_long)nelmts_in_buf, (long_long)elmts_begin,
- (long_long)elmts_count);
- }
-
-
- /* The task is to transfer elmts_count elements, starting at
- * elmts_begin position, using transfer buffer of buf_size bytes.
+ /* The task is to transfer bytes_count bytes, starting at
+ * bytes_begin position, using transfer buffer of buf_size bytes.
* If interleaved, select buf_size at a time, in round robin
* fashion, according to number of process. Otherwise, select
- * all elmt_count in contiguous.
+ * all bytes_count in contiguous.
*/
- nelmts_xfer = 0 ;
-
- /* Start "raw data" write timer */
- set_time(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, START);
+ nbytes_xfer = 0 ;
- while (nelmts_xfer < elmts_count){
- /* transfer one buffer of data each round */
- /* Note: because size_t is unsigned, avoid expressions that */
- /* can be negative. */
- if ((nelmts_xfer + (off_t)nelmts_in_buf) <= elmts_count) {
- nelmts_toxfer = nelmts_in_buf;
- } else {
- /* last transfer of a partial buffer */
- nelmts_toxfer = elmts_count - nelmts_xfer;
- }
+ /* Set base file offset for all I/O patterns and POSIX access */
+ posix_file_offset = dset_offset + bytes_begin;
- if (parms->verify) {
- /*Prepare write data for verify later*/
- int *intptr = (int *)buffer;
- size_t i;
+ /* Set base file offset for all I/O patterns and MPI access */
+ mpi_file_offset = (MPI_Offset)(dset_offset + bytes_begin);
- for (i = 0; i < nelmts_toxfer; ++i)
- *intptr++ = pio_mpi_rank_g;
- }
+ /* Start "raw data" write timer */
+ set_time(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, START);
+ while (nbytes_xfer < bytes_count){
/* Write */
/* Calculate offset of write within a dataset/file */
switch (parms->io_type) {
case POSIXIO:
- if (parms->interleaved==0) {
- /* Contiguous pattern */
- /* need to (off_t) the elmnts_begin expression because they */
- /* may be of smaller sized integer types */
- file_offset = dset_offset + (off_t)(elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE;
- } /* end if */
- else {
- /* Interleaved access pattern */
- /* Skip offset over blocks of other processes */
- file_offset = dset_offset +
- (off_t)(elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g))*(off_t)ELMT_SIZE;
- } /* end else */
-
- /* only care if seek returns error */
- rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0;
- VRFY((rc==0), "POSIXSEEK");
- /* check if all bytes are transferred */
- rc = ((ssize_t)(nelmts_toxfer*ELMT_SIZE) ==
- POSIXWRITE(fd->posixfd, buffer, nelmts_toxfer*ELMT_SIZE));
- VRFY((rc != 0), "POSIXWRITE");
- break;
-
- case MPIO:
- if (parms->interleaved==0){
- /* Contiguous pattern */
- mpi_offset = dset_offset + (elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE;
+ /* Contiguous pattern */
+ if (parms->interleaved==0) {
+ /* Compute file offset */
+ file_offset = posix_file_offset + (off_t)nbytes_xfer;
+
+ /* only care if seek returns error */
+ rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0;
+ VRFY((rc==0), "POSIXSEEK");
+
+ /* check if all bytes are written */
+ rc = ((ssize_t)buf_size ==
+ POSIXWRITE(fd->posixfd, buffer, buf_size));
+ VRFY((rc != 0), "POSIXWRITE");
+
+ /* Advance global offset in dataset */
+ nbytes_xfer+=buf_size;
} /* end if */
+ /* Interleaved access pattern */
else {
- /* Interleaved access pattern */
- /* Skip offset over blocks of other processes */
- mpi_offset = dset_offset + (elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g))*(off_t)ELMT_SIZE;
+ /* Set the base of user's buffer */
+ buf_p=(unsigned char *)buffer;
+
+ /* Set the number of bytes to transfer this time */
+ nbytes_toxfer = buf_size;
+
+ /* Loop over the buffers to write */
+ while(nbytes_toxfer>0) {
+ /* Skip offset over blocks of other processes */
+ file_offset = posix_file_offset +
+ (off_t)(nbytes_xfer*pio_mpi_nprocs_g);
+
+ /* only care if seek returns error */
+ rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0;
+ VRFY((rc==0), "POSIXSEEK");
+
+ /* check if all bytes are written */
+ rc = ((ssize_t)blk_size ==
+ POSIXWRITE(fd->posixfd, buf_p, blk_size));
+ VRFY((rc != 0), "POSIXWRITE");
+
+ /* Advance location in buffer */
+ buf_p+=blk_size;
+
+ /* Advance global offset in dataset */
+ nbytes_xfer+=blk_size;
+
+ /* Decrement number of bytes left this time */
+ nbytes_toxfer-=blk_size;
+ } /* end while */
} /* end else */
+ break;
+ case MPIO:
+ /* Independent file access */
if(parms->collective==0) {
- mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buffer,
- (int)nelmts_toxfer, ELMT_MPI_TYPE,
- &mpi_status);
- VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE");
+ /* Contiguous pattern */
+ if (parms->interleaved==0){
+ /* Compute offset in file */
+ mpi_offset = mpi_file_offset +
+ nbytes_xfer;
+
+ /* Perform independent write */
+ mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buffer,
+ (int)(buf_size/blk_size), mpi_blk_type,
+ &mpi_status);
+ VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE");
+
+ /* Advance global offset in dataset */
+ nbytes_xfer+=buf_size;
+ } /* end if */
+ /* Interleaved access pattern */
+ else {
+ /* Set the base of user's buffer */
+ buf_p=(unsigned char *)buffer;
+
+ /* Set the number of bytes to transfer this time */
+ nbytes_toxfer = buf_size;
+
+ /* Loop over the buffers to write */
+ while(nbytes_toxfer>0) {
+ /* Skip offset over blocks of other processes */
+ mpi_offset = mpi_file_offset +
+ (nbytes_xfer*pio_mpi_nprocs_g);
+
+ /* Perform independent write */
+ mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buf_p,
+ (int)1, mpi_blk_type, &mpi_status);
+ VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE");
+
+ /* Advance location in buffer */
+ buf_p+=blk_size;
+
+ /* Advance global offset in dataset */
+ nbytes_xfer+=blk_size;
+
+ /* Decrement number of bytes left this time */
+ nbytes_toxfer-=blk_size;
+ } /* end while */
+ } /* end else */
} /* end if */
+ /* Collective file access */
else {
- mrc = MPI_File_write_at_all(fd->mpifd, mpi_offset, buffer,
- (int)nelmts_toxfer, ELMT_MPI_TYPE,
- &mpi_status);
- VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE");
+ /* Contiguous access pattern */
+ if (parms->interleaved==0){
+ /* Compute offset in file */
+ mpi_offset = mpi_file_offset +
+ nbytes_xfer;
+
+ /* Perform independent write */
+ mrc = MPI_File_write_at_all(fd->mpifd, mpi_offset, buffer,
+ (int)(buf_size/blk_size), mpi_blk_type, &mpi_status);
+ VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE");
+
+ /* Advance global offset in dataset */
+ nbytes_xfer+=buf_size;
+ } /* end if */
+ /* Interleaved access pattern */
+ else {
+ /* Compute offset in file */
+ mpi_offset = mpi_file_offset +
+ (nbytes_xfer*pio_mpi_nprocs_g);
+
+ /* Set the file view */
+ mrc = MPI_File_set_view(fd->mpifd, mpi_offset, mpi_blk_type,
+ mpi_file_type, (char*)"native", h5_io_info_g);
+ VRFY((mrc==MPI_SUCCESS), "MPIO_VIEW");
+
+ /* Perform write */
+ mrc = MPI_File_write_at_all(fd->mpifd, 0, buffer,
+ (int)(buf_size/blk_size), mpi_blk_type, &mpi_status);
+ VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE");
+
+ /* Advance global offset in dataset */
+ nbytes_xfer+=buf_size;
+ } /* end else */
} /* end else */
break;
case PHDF5:
- /* Set up the file dset space id to select the segment to process */
+ /* Set up the file dset space id to move the selection to process */
if (parms->interleaved==0){
/* Contiguous pattern */
- h5start[0] = elmts_begin + nelmts_xfer;
+ h5offset[0] = nbytes_xfer;
} /* end if */
else {
/* Interleaved access pattern */
/* Skip offset over blocks of other processes */
- h5start[0] = elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g);
+ h5offset[0] = (nbytes_xfer*pio_mpi_nprocs_g);
} /* end else */
- h5stride[0] = h5block[0] = nelmts_toxfer;
- h5count[0] = 1;
- hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET,
- h5start, h5stride, h5count, h5block);
- VRFY((hrc >= 0), "H5Sset_hyperslab");
-
- /* Only need selection in memory dataset if it is smaller than the whole buffer */
- if(nelmts_toxfer<nelmts_in_buf) {
- /* Setup the memory space id too. Only start is different */
- h5start[0] = 0;
- hrc = H5Sselect_hyperslab(h5mem_space_id, H5S_SELECT_SET,
- h5start, h5stride, h5count, h5block);
- VRFY((hrc >= 0), "H5Sset_hyperslab");
- } /* end if */
+ hrc = H5Soffset_simple(h5dset_space_id, h5offset);
+ VRFY((hrc >= 0), "H5Soffset_simple");
- /* set write time here */
+ /* Write the buffer out */
hrc = H5Dwrite(h5ds_id, ELMT_H5_TYPE, h5mem_space_id,
h5dset_space_id, h5dxpl, buffer);
VRFY((hrc >= 0), "H5Dwrite");
+
+ /* Increment number of bytes transferred */
+ nbytes_xfer += buf_size;
+
break;
} /* switch (parms->io_type) */
-
- /* Increment number of elements transferred */
- nelmts_xfer += nelmts_toxfer;
- }
+ } /* end while */
/* Stop "raw data" write timer */
set_time(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, STOP);
@@ -785,7 +905,7 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets,
/* Calculate write time */
/* Close dataset. Only HDF5 needs to do an explicit close. */
- if (parms->io_type == PHDF5){
+ if (parms->io_type == PHDF5) {
hrc = H5Dclose(h5ds_id);
if (hrc < 0) {
@@ -794,10 +914,21 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets,
}
h5ds_id = -1;
- }
- }
+ } /* end if */
+ } /* end for */
done:
+ /* release MPI-I/O objects */
+ if (parms->io_type == MPIO) {
+ /* Free file type */
+ mrc = MPI_Type_free( &mpi_file_type );
+ VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
+
+ /* Free buffer type */
+ mrc = MPI_Type_free( &mpi_blk_type );
+ VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
+ } /* end if */
+
/* release HDF5 objects */
if (h5dset_space_id != -1) {
hrc = H5Sclose(h5dset_space_id);
@@ -841,82 +972,176 @@ done:
*/
static herr_t
do_read(results *res, file_descr *fd, parameters *parms, long ndsets,
- off_t nelmts, size_t buf_size, void *buffer /*out*/)
+ off_t nbytes, size_t buf_size, void *buffer /*out*/)
{
int ret_code = SUCCESS;
int rc; /*routine return code */
- int mrc; /*MPI return code */
- MPI_Offset mpi_offset;
- MPI_Status mpi_status;
long ndset;
- off_t nelmts_xfer;
- size_t nelmts_toxfer;
+ size_t blk_size; /* The block size to subdivide the xfer buffer into */
+ off_t nbytes_xfer; /* Total number of bytes transferred so far */
+ size_t nbytes_toxfer; /* Number of bytes to transfer a particular time */
char dname[64];
- off_t dset_offset=0; /*dataset offset in a file */
- off_t file_offset; /*file offset of the next transfer */
- off_t dset_size; /*one dataset size in bytes */
- size_t nelmts_in_buf; /*how many element the buffer holds */
- off_t elmts_begin; /*first elmt this process transfer */
- off_t elmts_count; /*number of elmts this process transfer */
+ off_t dset_offset=0; /*dataset offset in a file */
+ off_t bytes_begin; /*first elmt this process transfer */
+ off_t bytes_count; /*number of elmts this process transfer */
+ unsigned char *buf_p; /* Current buffer pointer */
+
+ /* POSIX variables */
+ off_t file_offset; /* File offset of the next transfer */
+ off_t posix_file_offset; /* Base file offset of the next transfer */
+
+ /* MPI variables */
+ MPI_Offset mpi_file_offset;/* Base file offset of the next transfer*/
+ MPI_Offset mpi_offset; /* Offset in MPI file */
+ MPI_Datatype mpi_file_type; /* MPI derived type for file */
+ MPI_Datatype mpi_blk_type; /* MPI derived type for buffer */
+ MPI_Status mpi_status;
+ int mrc; /* MPI return code */
/* HDF5 variables */
- herr_t hrc; /*HDF5 return code */
- hsize_t h5dims[1]; /*dataset dim sizes */
+ herr_t hrc; /*HDF5 return code */
+ hsize_t h5dims[1]; /*dataset dim sizes */
hid_t h5dset_space_id = -1; /*dataset space ID */
hid_t h5mem_space_id = -1; /*memory dataspace ID */
- hid_t h5ds_id = -1; /*dataset handle */
- hsize_t h5block[1]; /*dataspace selection */
+ hid_t h5ds_id = -1; /*dataset handle */
+ hsize_t h5block[1]; /*dataspace selection */
hsize_t h5stride[1];
hsize_t h5count[1];
hssize_t h5start[1];
+ hssize_t h5offset[1]; /* Selection offset within dataspace */
hid_t h5dxpl = -1; /* Dataset transfer property list */
- /* calculate dataset parameters. data type is always native C int */
- dset_size = nelmts * (off_t)ELMT_SIZE;
- nelmts_in_buf = buf_size/ELMT_SIZE;
-
- /* hdf5 data space setup */
- if (parms->io_type == PHDF5){
- if(nelmts>0) {
- /* define a contiquous dataset of nelmts native ints */
- h5dims[0] = nelmts;
- h5dset_space_id = H5Screate_simple(1, h5dims, NULL);
- VRFY((h5dset_space_id >= 0), "H5Screate_simple");
- } /* end if */
- else {
- h5dset_space_id = H5Screate(H5S_SCALAR);
- VRFY((h5dset_space_id >= 0), "H5Screate");
- } /* end else */
-
- /* Create the memory dataspace that corresponds to the xfer buffer */
- if(nelmts_in_buf>0) {
- h5dims[0] = nelmts_in_buf;
- h5mem_space_id = H5Screate_simple(1, h5dims, NULL);
- VRFY((h5mem_space_id >= 0), "H5Screate_simple");
- } /* end if */
- else {
- h5mem_space_id = H5Screate(H5S_SCALAR);
- VRFY((h5mem_space_id >= 0), "H5Screate");
- } /* end else */
-
- /* Create the dataset transfer property list */
- h5dxpl = H5Pcreate(H5P_DATASET_XFER);
- if (h5dxpl < 0) {
- fprintf(stderr, "HDF5 Property List Create failed\n");
- GOTOERROR(FAIL);
- }
+ /* Get the parameters from the parameter block */
+ blk_size=parms->blk_size;
+
+ /* There are two kinds of transfer patterns, contiguous and interleaved.
+ * Let 0,1,2,...,n be data accessed by process 0,1,2,...,n
+ * where n is rank of the last process.
+ * In contiguous pattern, data are accessed as
+ * 000...111...222...nnn...
+ * In interleaved pattern, data are accessed as
+ * 012...n012...n...
+ * These are all in the scope of one dataset.
+ */
+ if (parms->interleaved==0) {
+ /* Contiguous Pattern: */
+ bytes_begin = (off_t)(((double)nbytes*pio_mpi_rank_g)/pio_mpi_nprocs_g);
+ } /* end if */
+ else {
+ /* Interleaved Pattern: */
+ bytes_begin = (off_t)(blk_size*pio_mpi_rank_g);
+ } /* end else */
+
+ /* Calculate the total number of bytes (bytes_count) to be
+ * transferred by this process. It may be different for different
+ * transfer pattern due to rounding to integral values.
+ */
+ /*
+ * Calculate the beginning bytes of this process and the next.
+ * bytes_count is the difference between these two beginnings.
+ * This way, it eliminates any rounding errors.
+ * (This is tricky, don't mess with the formula, rounding errors
+ * can easily get introduced) */
+ bytes_count = (off_t)(((double)nbytes*(pio_mpi_rank_g+1)) / pio_mpi_nprocs_g)
+ - (off_t)(((double)nbytes*pio_mpi_rank_g) / pio_mpi_nprocs_g);
+
+ /* debug */
+ if (pio_debug_level >= 4) {
+ HDprint_rank(output);
+ HDfprintf(output, "Debug(do_read): "
+ "buf_size=%Hd, bytes_begin=%Hd, bytes_count=%Hd\n",
+ (long_long)buf_size, (long_long)bytes_begin,
+ (long_long)bytes_count);
+ }
- /* Change to collective I/O, if asked */
- if(parms->collective) {
- hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE);
- if (hrc < 0) {
- fprintf(stderr, "HDF5 Property List Set failed\n");
+ /* I/O Access specific setup */
+ switch (parms->io_type) {
+ case POSIXIO:
+ /* No extra setup */
+ break;
+
+ case MPIO: /* MPI-I/O setup */
+ /* Build block's derived type */
+ mrc = MPI_Type_contiguous((int)blk_size,
+ MPI_BYTE, &mpi_blk_type);
+ VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
+
+ /* Build file's derived type */
+ mrc = MPI_Type_vector((int)(buf_size/blk_size), (int)1,
+ (int)pio_mpi_nprocs_g, mpi_blk_type, &mpi_file_type);
+ VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_CREATE");
+
+ /* Commit file type */
+ mrc = MPI_Type_commit( &mpi_file_type );
+ VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
+
+ /* Commit buffer type */
+ mrc = MPI_Type_commit( &mpi_blk_type );
+ VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_COMMIT");
+ break;
+
+ case PHDF5: /* HDF5 setup */
+ if(nbytes>0) {
+ /* define a contiquous dataset of nbytes native bytes */
+ h5dims[0] = nbytes;
+ h5dset_space_id = H5Screate_simple(1, h5dims, NULL);
+ VRFY((h5dset_space_id >= 0), "H5Screate_simple");
+
+ /* Set up the file dset space id to select the pattern to access */
+ if (parms->interleaved==0){
+ /* Contiguous pattern */
+ h5start[0] = bytes_begin;
+ h5stride[0] = h5block[0] = blk_size;
+ h5count[0] = buf_size/blk_size;
+ } /* end if */
+ else {
+ /* Interleaved access pattern */
+ /* Skip offset over blocks of other processes */
+ h5start[0] = bytes_begin;
+ h5stride[0] = blk_size*pio_mpi_nprocs_g;
+ h5block[0] = blk_size;
+ h5count[0] = buf_size/blk_size;
+ } /* end else */
+ hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET,
+ h5start, h5stride, h5count, h5block);
+ VRFY((hrc >= 0), "H5Sselect_hyperslab");
+ } /* end if */
+ else {
+ h5dset_space_id = H5Screate(H5S_SCALAR);
+ VRFY((h5dset_space_id >= 0), "H5Screate");
+ } /* end else */
+
+ /* Create the memory dataspace that corresponds to the xfer buffer */
+ if(buf_size>0) {
+ h5dims[0] = buf_size;
+ h5mem_space_id = H5Screate_simple(1, h5dims, NULL);
+ VRFY((h5mem_space_id >= 0), "H5Screate_simple");
+ } /* end if */
+ else {
+ h5mem_space_id = H5Screate(H5S_SCALAR);
+ VRFY((h5mem_space_id >= 0), "H5Screate");
+ } /* end else */
+
+ /* Create the dataset transfer property list */
+ h5dxpl = H5Pcreate(H5P_DATASET_XFER);
+ if (h5dxpl < 0) {
+ fprintf(stderr, "HDF5 Property List Create failed\n");
GOTOERROR(FAIL);
+ }
+
+ /* Change to collective I/O, if asked */
+ if(parms->collective) {
+ hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE);
+ if (hrc < 0) {
+ fprintf(stderr, "HDF5 Property List Set failed\n");
+ GOTOERROR(FAIL);
+ } /* end if */
} /* end if */
- } /* end if */
- } /* end if */
+ break;
+ } /* end switch */
for (ndset = 1; ndset <= ndsets; ++ndset) {
+
/* Calculate dataset offset within a file */
/* create dataset */
@@ -924,7 +1149,7 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets,
case POSIXIO:
case MPIO:
/* both posix and mpi io just need dataset offset in file*/
- dset_offset = (ndset - 1) * dset_size;
+ dset_offset = (ndset - 1) * nbytes;
break;
case PHDF5:
@@ -938,219 +1163,219 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets,
break;
}
- /* There are two kinds of transfer patterns, contiguous and interleaved.
- * Let 0,1,2,...,n be data accessed by process 0,1,2,...,n
- * where n is rank of the last process.
- * In contiguous pattern, data are accessed as
- * 000...111...222...nnn...
- * In interleaved pattern, data are accessed as
- * 012...n012...n...
- * These are all in the scope of one dataset.
- */
- /* Calculate the total number of elements (elmts_count) to be
- * transferred by this process. It may be different for different
- * transfer pattern due to rounding to integral values.
- */
- if (parms->interleaved==0){
- /* Contiguous Pattern:
- * Calculate the beginning element of this process and the next.
- * elmts_count is the difference between these two beginnings.
- * This way, it eliminates any rounding errors.
- */
- elmts_begin = (off_t)(((double)nelmts)/pio_mpi_nprocs_g*pio_mpi_rank_g);
-
- /* Do not cast elmt_begin to other types, especially non-integral
- * types, else it may introduce rounding discrepency. */
- if (pio_mpi_rank_g < (pio_mpi_nprocs_g - 1))
- elmts_count = (off_t)(((double)nelmts) / pio_mpi_nprocs_g * (pio_mpi_rank_g + 1))
- - elmts_begin;
- else
- /* last process. Take whatever are left */
- elmts_count = nelmts - elmts_begin;
- } /* end if */
- else {
- /* Interleaved Pattern:
- * Each process takes buf_size of elements, starting with the first
- * process. So, the last process may have fewer or even none.
- * Calculate the beginning element of this process.
- * The elmnts_begin here marks only the beginning of the first
- * block accessed by this process.
- */
- /* Algorithm:
- * First allocate equal blocks per process, i.e. one block each
- * process for every block_size*nprocs.
- * If there is remaining unallocated, give a block each to process
- * starting at proc 0. The last process may get a partial block.
- */
- off_t remain_nelmts, remain_begin; /* unallocated remaining*/
-
- elmts_begin = (off_t)(nelmts_in_buf*pio_mpi_rank_g);
-
- /* must use integer calculation next */
- /* allocate equal blocks per process */
- elmts_count = (nelmts / (off_t)(nelmts_in_buf*pio_mpi_nprocs_g)) *
- (off_t)nelmts_in_buf;
- remain_nelmts = nelmts % ((off_t)(nelmts_in_buf*pio_mpi_nprocs_g));
-
- /* allocate any remaining */
- remain_begin = (off_t)(nelmts_in_buf*pio_mpi_rank_g);
- if (remain_nelmts > remain_begin) {
- /* it gets something */
- if (remain_nelmts > (remain_begin+(off_t)nelmts_in_buf)) {
- /* one full block */
- elmts_count += nelmts_in_buf;
- } /* end if */
- else {
- /* only a partial block */
- elmts_count += remain_nelmts - remain_begin;
- } /* end else */
- } /* end if */
- } /* end else */
- /* debug */
- if (pio_debug_level >= 4) {
- HDprint_rank(output);
- HDfprintf(output, "Debug(do_read): "
- "nelmts_in_buf=%Hd, elmts_begin=%Hd, elmts_count=%Hd\n",
- (long_long)nelmts_in_buf, (long_long)elmts_begin,
- (long_long)elmts_count);
- }
-
-
- /* The task is to transfer elmts_count elements, starting at
- * elmts_begin position, using transfer buffer of buf_size bytes.
+ /* The task is to transfer bytes_count bytes, starting at
+ * bytes_begin position, using transfer buffer of buf_size bytes.
* If interleaved, select buf_size at a time, in round robin
* fashion, according to number of process. Otherwise, select
- * all elmt_count in contiguous.
+ * all bytes_count in contiguous.
*/
- nelmts_xfer = 0 ;
+ nbytes_xfer = 0 ;
+
+ /* Set base file offset for all I/O patterns and POSIX access */
+ posix_file_offset = dset_offset + bytes_begin;
+
+ /* Set base file offset for all I/O patterns and MPI access */
+ mpi_file_offset = (MPI_Offset)(dset_offset + bytes_begin);
/* Start "raw data" read timer */
set_time(res->timers, HDF5_RAW_READ_FIXED_DIMS, START);
- while (nelmts_xfer < elmts_count){
- /* transfer one buffer of data each round */
- /* Note: because size_t is unsigned, avoid expressions that */
- /* can be negative. */
- if ((nelmts_xfer + (off_t)nelmts_in_buf) <= elmts_count) {
- nelmts_toxfer = nelmts_in_buf;
- } else {
- /* last transfer of a partial buffer */
- nelmts_toxfer = elmts_count - nelmts_xfer;
- }
-
- /* read */
+ while (nbytes_xfer < bytes_count){
+ /* Read */
/* Calculate offset of read within a dataset/file */
- switch (parms->io_type){
+ switch (parms->io_type) {
case POSIXIO:
- if (parms->interleaved==0){
- /* Contiguous pattern */
- /* need to (off_t) the elmnts_begin expression because they */
- /* may be of smaller sized integer types */
- file_offset = dset_offset + (off_t)(elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE;
- } /* end if */
- else {
- /* Interleaved access pattern */
- /* Skip offset over blocks of other processes */
- file_offset = dset_offset +
- (off_t)(elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g))*(off_t)ELMT_SIZE;
- } /* end else */
-
- /* only care if seek returns error */
- rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0;
- VRFY((rc==0), "POSIXSEEK");
- /* check if all bytes are transferred */
- rc = ((ssize_t)(nelmts_toxfer*ELMT_SIZE) ==
- POSIXREAD(fd->posixfd, buffer, nelmts_toxfer*ELMT_SIZE));
- VRFY((rc != 0), "POSIXREAD");
- break;
-
- case MPIO:
- if (parms->interleaved==0){
- /* Contiguous pattern */
- mpi_offset = dset_offset + (elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE;
+ /* Contiguous pattern */
+ if (parms->interleaved==0) {
+ /* Compute file offset */
+ file_offset = posix_file_offset + (off_t)nbytes_xfer;
+
+ /* only care if seek returns error */
+ rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0;
+ VRFY((rc==0), "POSIXSEEK");
+
+ /* check if all bytes are written */
+ rc = ((ssize_t)buf_size ==
+ POSIXREAD(fd->posixfd, buffer, buf_size));
+ VRFY((rc != 0), "POSIXREAD");
+
+ /* Advance global offset in dataset */
+ nbytes_xfer+=buf_size;
} /* end if */
+ /* Interleaved access pattern */
else {
- /* Interleaved access pattern */
- /* Skip offset over blocks of other processes */
- mpi_offset = dset_offset + (elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g))*(off_t)ELMT_SIZE;
+ /* Set the base of user's buffer */
+ buf_p=(unsigned char *)buffer;
+
+ /* Set the number of bytes to transfer this time */
+ nbytes_toxfer = buf_size;
+
+ /* Loop over the buffers to read */
+ while(nbytes_toxfer>0) {
+ /* Skip offset over blocks of other processes */
+ file_offset = posix_file_offset +
+ (off_t)(nbytes_xfer*pio_mpi_nprocs_g);
+
+ /* only care if seek returns error */
+ rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0;
+ VRFY((rc==0), "POSIXSEEK");
+
+ /* check if all bytes are written */
+ rc = ((ssize_t)blk_size ==
+ POSIXREAD(fd->posixfd, buf_p, blk_size));
+ VRFY((rc != 0), "POSIXREAD");
+
+ /* Advance location in buffer */
+ buf_p+=blk_size;
+
+ /* Advance global offset in dataset */
+ nbytes_xfer+=blk_size;
+
+ /* Decrement number of bytes left this time */
+ nbytes_toxfer-=blk_size;
+ } /* end while */
} /* end else */
+ break;
+ case MPIO:
+ /* Independent file access */
if(parms->collective==0) {
- mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buffer,
- (int)nelmts_toxfer, ELMT_MPI_TYPE,
- &mpi_status);
- VRFY((mrc==MPI_SUCCESS), "MPIO_read");
+ /* Contiguous pattern */
+ if (parms->interleaved==0){
+ /* Compute offset in file */
+ mpi_offset = mpi_file_offset +
+ nbytes_xfer;
+
+ /* Perform independent read */
+ mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buffer,
+ (int)(buf_size/blk_size), mpi_blk_type,
+ &mpi_status);
+ VRFY((mrc==MPI_SUCCESS), "MPIO_READ");
+
+ /* Advance global offset in dataset */
+ nbytes_xfer+=buf_size;
+ } /* end if */
+ /* Interleaved access pattern */
+ else {
+ /* Set the base of user's buffer */
+ buf_p=(unsigned char *)buffer;
+
+ /* Set the number of bytes to transfer this time */
+ nbytes_toxfer = buf_size;
+
+ /* Loop over the buffers to read */
+ while(nbytes_toxfer>0) {
+ /* Skip offset over blocks of other processes */
+ mpi_offset = mpi_file_offset +
+ (nbytes_xfer*pio_mpi_nprocs_g);
+
+ /* Perform independent read */
+ mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buf_p,
+ (int)1, mpi_blk_type, &mpi_status);
+ VRFY((mrc==MPI_SUCCESS), "MPIO_READ");
+
+ /* Advance location in buffer */
+ buf_p+=blk_size;
+
+ /* Advance global offset in dataset */
+ nbytes_xfer+=blk_size;
+
+ /* Decrement number of bytes left this time */
+ nbytes_toxfer-=blk_size;
+ } /* end while */
+ } /* end else */
} /* end if */
+ /* Collective file access */
else {
- mrc = MPI_File_read_at_all(fd->mpifd, mpi_offset, buffer,
- (int)nelmts_toxfer, ELMT_MPI_TYPE,
- &mpi_status);
- VRFY((mrc==MPI_SUCCESS), "MPIO_read");
+ /* Contiguous access pattern */
+ if (parms->interleaved==0){
+ /* Compute offset in file */
+ mpi_offset = mpi_file_offset +
+ nbytes_xfer;
+
+ /* Perform collective read */
+ mrc = MPI_File_read_at_all(fd->mpifd, mpi_offset, buffer,
+ (int)(buf_size/blk_size), mpi_blk_type, &mpi_status);
+ VRFY((mrc==MPI_SUCCESS), "MPIO_READ");
+
+ /* Advance global offset in dataset */
+ nbytes_xfer+=buf_size;
+ } /* end if */
+ /* Interleaved access pattern */
+ else {
+ /* Compute offset in file */
+ mpi_offset = mpi_file_offset +
+ (nbytes_xfer*pio_mpi_nprocs_g);
+
+ /* Set the file view */
+ mrc = MPI_File_set_view(fd->mpifd, mpi_offset, mpi_blk_type,
+ mpi_file_type, (char*)"native", h5_io_info_g);
+ VRFY((mrc==MPI_SUCCESS), "MPIO_VIEW");
+
+ /* Perform collective read */
+ mrc = MPI_File_read_at_all(fd->mpifd, 0, buffer,
+ (int)(buf_size/blk_size), mpi_blk_type, &mpi_status);
+ VRFY((mrc==MPI_SUCCESS), "MPIO_READ");
+
+ /* Advance global offset in dataset */
+ nbytes_xfer+=buf_size;
+ } /* end else */
} /* end else */
break;
case PHDF5:
- /* Set up the dset space id to select the segment to process */
+ /* Set up the file dset space id to move the selection to process */
if (parms->interleaved==0){
/* Contiguous pattern */
- h5start[0] = elmts_begin + nelmts_xfer;
+ h5offset[0] = nbytes_xfer;
} /* end if */
else {
/* Interleaved access pattern */
/* Skip offset over blocks of other processes */
- h5start[0] = elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g);
+ h5offset[0] = (nbytes_xfer*pio_mpi_nprocs_g);
} /* end else */
- h5stride[0] = h5block[0] = nelmts_toxfer;
- h5count[0] = 1;
- hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET,
- h5start, h5stride, h5count, h5block);
- VRFY((hrc >= 0), "H5Sset_hyperslab");
-
- /* Only need selection in memory dataset if it is smaller than the whole buffer */
- if(nelmts_toxfer<nelmts_in_buf) {
- /* Setup the memory space id too. Only start is different */
- h5start[0] = 0;
- hrc = H5Sselect_hyperslab(h5mem_space_id, H5S_SELECT_SET,
- h5start, h5stride, h5count, h5block);
- VRFY((hrc >= 0), "H5Sset_hyperslab");
- } /* end if */
+ hrc = H5Soffset_simple(h5dset_space_id, h5offset);
+ VRFY((hrc >= 0), "H5Soffset_simple");
- /* set read time here */
+ /* Read the buffer in */
hrc = H5Dread(h5ds_id, ELMT_H5_TYPE, h5mem_space_id,
- h5dset_space_id, h5dxpl, buffer);
+ h5dset_space_id, h5dxpl, buffer);
VRFY((hrc >= 0), "H5Dread");
+
+ /* Increment number of bytes transferred */
+ nbytes_xfer += buf_size;
+
break;
} /* switch (parms->io_type) */
+ /* Verify raw data, if asked */
if (parms->verify) {
- /*verify read data*/
- int *intptr = (int *)buffer;
+ /* Verify data read */
+ unsigned char *ucharptr = (unsigned char *)buffer;
size_t i;
int nerror=0;
- for (i = 0; i < nelmts_toxfer; ++i){
- if (*intptr++ != pio_mpi_rank_g){
+ for (i = 0; i < buf_size; ++i){
+ if (*ucharptr++ != pio_mpi_rank_g) {
if (++nerror < 20){
/* report at most 20 errors */
HDprint_rank(output);
HDfprintf(output, "read data error, expected (%Hd), "
"got (%Hd)\n",
(long_long)pio_mpi_rank_g,
- (long_long)*(intptr-1));
- }
- }
- }
+ (long_long)*(ucharptr-1));
+ } /* end if */
+ } /* end if */
+ } /* end for */
if (nerror >= 20) {
HDprint_rank(output);
HDfprintf(output, "...");
- HDfprintf(output, "total read data errors=%Hd\n",
+ HDfprintf(output, "total read data errors=%d\n",
nerror);
- }
+ } /* end if */
} /* if (parms->verify) */
- /* Increment number of elements transferred */
- nelmts_xfer += nelmts_toxfer;
- }
+ } /* end while */
/* Stop "raw data" read timer */
set_time(res->timers, HDF5_RAW_READ_FIXED_DIMS, STOP);
@@ -1158,7 +1383,7 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets,
/* Calculate read time */
/* Close dataset. Only HDF5 needs to do an explicit close. */
- if (parms->io_type == PHDF5){
+ if (parms->io_type == PHDF5) {
hrc = H5Dclose(h5ds_id);
if (hrc < 0) {
@@ -1167,10 +1392,21 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets,
}
h5ds_id = -1;
- }
- }
+ } /* end if */
+ } /* end for */
done:
+ /* release MPI-I/O objects */
+ if (parms->io_type == MPIO) {
+ /* Free file type */
+ mrc = MPI_Type_free( &mpi_file_type );
+ VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
+
+ /* Free buffer type */
+ mrc = MPI_Type_free( &mpi_blk_type );
+ VRFY((mrc==MPI_SUCCESS), "MPIO_TYPE_FREE");
+ } /* end if */
+
/* release HDF5 objects */
if (h5dset_space_id != -1) {
hrc = H5Sclose(h5dset_space_id);
@@ -1713,6 +1949,8 @@ invalidate_file_cache(const char *filename)
#else
+/* turn the stubs off since some compilers are warning they are not used */
+#if 0
/* H5_HAVE_GPFS isn't defined...stub functions */
static void
@@ -1757,6 +1995,8 @@ invalidate_file_cache(const char UNUSED *filename)
return;
}
+#endif /* 0 */
+
#endif /* H5_HAVE_GPFS */
#ifdef TIME_MPI
diff --git a/perform/pio_perf.c b/perform/pio_perf.c
index 0f43f15..5ddac67 100644
--- a/perform/pio_perf.c
+++ b/perform/pio_perf.c
@@ -109,7 +109,7 @@ int pio_debug_level = 0;/* The debug level:
*/
/* local variables */
-static const char *progname = "pio_perf";
+static const char *progname = "h5perf";
/*
* Command-line options: The user can specify short or long-named
@@ -117,14 +117,11 @@ static const char *progname = "pio_perf";
* adding more, make sure that they don't clash with each other.
*/
#if 1
-static const char *s_opts = "ha:A:cCD:f:P:p:X:x:nd:F:i:Io:stT:w";
+static const char *s_opts = "a:A:B:cCd:D:e:F:hi:Ino:p:P:stT:wx:X:";
#else
-static const char *s_opts = "ha:A:bcCD:f:P:p:X:x:nd:F:i:Io:stT:w";
+static const char *s_opts = "a:A:bB:cCd:D:e:F:hi:Ino:p:P:stT:wx:X:";
#endif /* 1 */
static struct long_options l_opts[] = {
- { "help", no_arg, 'h' },
- { "hel", no_arg, 'h' },
- { "he", no_arg, 'h' },
{ "align", require_arg, 'a' },
{ "alig", require_arg, 'a' },
{ "ali", require_arg, 'a' },
@@ -139,6 +136,15 @@ static struct long_options l_opts[] = {
{ "bin", no_arg, 'b' },
{ "bi", no_arg, 'b' },
#endif /* 0 */
+ { "block-size", require_arg, 'B' },
+ { "block-siz", require_arg, 'B' },
+ { "block-si", require_arg, 'B' },
+ { "block-s", require_arg, 'B' },
+ { "block-", require_arg, 'B' },
+ { "block", require_arg, 'B' },
+ { "bloc", require_arg, 'B' },
+ { "blo", require_arg, 'B' },
+ { "bl", require_arg, 'B' },
{ "chunk", no_arg, 'c' },
{ "chun", no_arg, 'c' },
{ "chu", no_arg, 'c' },
@@ -156,13 +162,9 @@ static struct long_options l_opts[] = {
{ "debu", require_arg, 'D' },
{ "deb", require_arg, 'D' },
{ "de", require_arg, 'D' },
- { "file-size", require_arg, 'f' },
- { "file-siz", require_arg, 'f' },
- { "file-si", require_arg, 'f' },
- { "file-s", require_arg, 'f' },
- { "file", require_arg, 'f' },
- { "fil", require_arg, 'f' },
- { "fi", require_arg, 'f' },
+ { "help", no_arg, 'h' },
+ { "hel", no_arg, 'h' },
+ { "he", no_arg, 'h' },
{ "interleaved", require_arg, 'I' },
{ "interleave", require_arg, 'I' },
{ "interleav", require_arg, 'I' },
@@ -213,6 +215,11 @@ static struct long_options l_opts[] = {
{ "no-f", no_arg, 'n' },
{ "no-", no_arg, 'n' },
{ "no", no_arg, 'n' },
+ { "num-bytes", require_arg, 'e' },
+ { "num-byte", require_arg, 'e' },
+ { "num-byt", require_arg, 'e' },
+ { "num-by", require_arg, 'e' },
+ { "num-b", require_arg, 'e' },
{ "num-dsets", require_arg, 'd' },
{ "num-dset", require_arg, 'd' },
{ "num-dse", require_arg, 'd' },
@@ -260,14 +267,15 @@ static struct long_options l_opts[] = {
struct options {
long io_types; /* bitmask of which I/O types to test */
const char *output_file; /* file to print report to */
- off_t file_size; /* size of file */
long num_dsets; /* number of datasets */
long num_files; /* number of files */
+ size_t num_bpp; /* number of bytes per proc per dset */
int num_iters; /* number of iterations */
int max_num_procs; /* maximum number of processes to use */
int min_num_procs; /* minimum number of processes to use */
size_t max_xfer_size; /* maximum transfer buffer size */
size_t min_xfer_size; /* minimum transfer buffer size */
+ size_t blk_size; /* Block size */
unsigned interleaved; /* Interleaved vs. contiguous blocks */
unsigned collective; /* Collective vs. independent I/O */
int print_times; /* print times as well as throughputs */
@@ -410,6 +418,7 @@ run_test_loop(struct options *opts)
parms.num_files = opts->num_files;
parms.num_dsets = opts->num_dsets;
parms.num_iters = opts->num_iters;
+ parms.blk_size = opts->blk_size;
parms.interleaved = opts->interleaved;
parms.collective = opts->collective;
parms.h5_align = opts->h5_alignment;
@@ -439,17 +448,16 @@ run_test_loop(struct options *opts)
for (buf_size = opts->min_xfer_size;
buf_size <= opts->max_xfer_size; buf_size <<= 1) {
parms.buf_size = buf_size;
- parms.num_elmts = opts->file_size /
- (off_t)(parms.num_dsets * sizeof(int));
+ parms.num_bytes = (off_t)opts->num_bpp*parms.num_procs;
print_indent(1);
output_report("Transfer Buffer Size: %ld bytes, File size: %.2f MBs\n",
buf_size,
- ((double)parms.num_dsets * (double)parms.num_elmts *
- (double)sizeof(int)) / ONE_MB);
+ ((double)parms.num_dsets * (double)parms.num_bytes)
+ / ONE_MB);
print_indent(1);
- output_report(" # of files: %ld, # of dsets: %ld, # of elmts per dset: %ld\n",
- parms.num_files, parms.num_dsets, parms.num_elmts);
+ output_report(" # of files: %ld, # of datasets: %ld, dataset size: %.2f MBs\n",
+ parms.num_files, parms.num_dsets, (double)parms.num_bytes/ONE_MB);
if (opts->io_types & PIO_POSIX)
run_test(POSIXIO, parms, opts);
@@ -503,7 +511,7 @@ run_test(iotype iot, parameters parms, struct options *opts)
minmax read_gross_mm = {0.0, 0.0, 0.0, 0};
minmax read_raw_mm = {0.0, 0.0, 0.0, 0};
- raw_size = (off_t)parms.num_dsets * (off_t)parms.num_elmts * (off_t)sizeof(int);
+ raw_size = (off_t)parms.num_dsets * (off_t)parms.num_bytes;
parms.io_type = iot;
print_indent(2);
output_report("IO API = ");
@@ -996,8 +1004,8 @@ report_parameters(struct options *opts)
HDfprintf(output, "rank %d: IO API=", rank);
print_io_api(opts->io_types);
- HDfprintf(output, "rank %d: File size=", rank);
- recover_size_and_print((long_long)opts->file_size, "\n");
+ HDfprintf(output, "rank %d: Number of bytes per process per dataset=", rank);
+ recover_size_and_print((long_long)opts->num_bpp, "\n");
HDfprintf(output, "rank %d: Number of files=%Hd\n", rank,
(long_long)opts->num_files);
@@ -1008,9 +1016,21 @@ report_parameters(struct options *opts)
HDfprintf(output, "rank %d: Number of processes=%d:%d\n", rank,
opts->min_num_procs, opts->max_num_procs);
+ HDfprintf(output, "rank %d: Size of dataset(s)=", rank);
+ recover_size_and_print((long_long)(opts->num_bpp * opts->min_num_procs), ":");
+ recover_size_and_print((long_long)(opts->num_bpp * opts->max_num_procs), "\n");
+
+ HDfprintf(output, "rank %d: File size=", rank);
+ recover_size_and_print((long_long)(opts->num_bpp * opts->min_num_procs
+ * opts->num_dsets), ":");
+ recover_size_and_print((long_long)(opts->num_bpp * opts->max_num_procs
+ * opts->num_dsets), "\n");
+
HDfprintf(output, "rank %d: Transfer buffer size=", rank);
recover_size_and_print((long_long)opts->min_xfer_size, ":");
recover_size_and_print((long_long)opts->max_xfer_size, "\n");
+ HDfprintf(output, "rank %d: Block size=", rank);
+ recover_size_and_print((long_long)opts->blk_size, "\n");
HDfprintf(output, "rank %d: Block Pattern in Dataset=", rank);
if(opts->interleaved)
@@ -1055,15 +1075,16 @@ parse_command_line(int argc, char *argv[])
cl_opts = (struct options *)malloc(sizeof(struct options));
cl_opts->output_file = NULL;
- cl_opts->file_size = 64 * ONE_MB;
cl_opts->io_types = 0; /* will set default after parsing options */
cl_opts->num_dsets = 1;
cl_opts->num_files = 1;
+ cl_opts->num_bpp = 256 * ONE_KB;
cl_opts->num_iters = 1;
cl_opts->max_num_procs = comm_world_nprocs_g;
cl_opts->min_num_procs = 1;
cl_opts->max_xfer_size = 1 * ONE_MB;
cl_opts->min_xfer_size = 128 * ONE_KB;
+ cl_opts->blk_size = 128 * ONE_KB; /* Default to writing 128K per block */
cl_opts->interleaved = 0; /* Default to contiguous blocks in dataset */
cl_opts->collective = 0; /* Default to independent I/O access */
cl_opts->print_times = FALSE; /* Printing times is off by default */
@@ -1119,6 +1140,9 @@ parse_command_line(int argc, char *argv[])
/* the future "binary" option */
break;
#endif /* 0 */
+ case 'B':
+ cl_opts->blk_size = parse_size_directive(opt_arg);
+ break;
case 'c':
/* Turn on chunked HDF5 dataset creation */
cl_opts->h5_use_chunks = TRUE;
@@ -1187,8 +1211,8 @@ parse_command_line(int argc, char *argv[])
}
break;
- case 'f':
- cl_opts->file_size = parse_size_directive(opt_arg);
+ case 'e':
+ cl_opts->num_bpp = parse_size_directive(opt_arg);
break;
case 'F':
cl_opts->num_files = atoi(opt_arg);
@@ -1318,15 +1342,20 @@ usage(const char *prog)
#if 0
printf(" -b, --binary The elusive binary option\n");
#endif /* 0 */
+ printf(" -B S, --block-size=S Block size within transfer buffer\n");
+ printf(" (see below for description)\n");
+ printf(" [default:128K]\n");
printf(" -c, --chunk Create HDF5 datasets chunked [default: off]\n");
- printf(" -C, --collective Use collective I/O for MPI and HDF5 APIs [default: off (i.e. independent I/O)]\n");
+ printf(" -C, --collective Use collective I/O for MPI and HDF5 APIs\n");
+ printf(" [default: off (i.e. independent I/O)]\n");
printf(" -d N, --num-dsets=N Number of datasets per file [default:1]\n");
printf(" -D DL, --debug=DL Indicate the debugging level\n");
printf(" [default: no debugging]\n");
- printf(" -f S, --file-size=S Size of a single file [default: 64M]\n");
+ printf(" -e S, --num-bytes=S Number of bytes per process per dataset\n");
+ printf(" [default: 256K]\n");
printf(" -F N, --num-files=N Number of files [default: 1]\n");
printf(" -i, --num-iterations Number of iterations to perform [default: 1]\n");
- printf(" -I --interleaved Interleaved block I/O (see below for example)\n");
+ printf(" -I, --interleaved Interleaved block I/O (see below for example)\n");
printf(" [default: Contiguous block I/O]\n");
printf(" -n, --no-fill Don't write fill values to HDF5 dataset\n");
printf(" (Supported in HDF5 library v1.5 only)\n");
@@ -1348,7 +1377,7 @@ usage(const char *prog)
printf(" M - Megabyte (%d)\n", ONE_MB);
printf(" G - Gigabyte (%d)\n", ONE_GB);
printf("\n");
- printf(" Example: 37M = 37 Megabytes = %d bytes\n", 37*ONE_MB);
+ printf(" Example: '37M' is 37 megabytes or %d bytes\n", 37*ONE_MB);
printf("\n");
printf(" AL - is an API list. Valid values are:\n");
printf(" phdf5 - Parallel HDF5\n");
@@ -1357,24 +1386,50 @@ usage(const char *prog)
printf("\n");
printf(" Example: --api=mpiio,phdf5\n");
printf("\n");
+ printf(" Block size vs. Transfer buffer size:\n");
+ printf(" The transfer buffer size is the size of a buffer in memory, which is\n");
+ printf(" broken into 'block size' pieces and written to the file. The pattern\n");
+ printf(" of the blocks in the file is described below in the 'Interleaved vs.\n");
+ printf(" Contiguous blocks' example.\n");
+ printf("\n");
+ printf(" If the collective I/O option is given, the blocks in each transfer buffer\n");
+ printf(" are written at once with an MPI derived type, for the MPI-I/O and PHDF5\n");
+ printf(" APIs.\n");
+ printf("\n");
printf(" Interleaved vs. Contiguous blocks:\n");
- printf(" For example, with a 4 process run,\n");
- printf(" Contiguous blocks are written to the file like so:\n");
- printf(" 1111222233334444\n");
- printf(" Interleaved blocks are written to the file like so:\n");
- printf(" 1234123412341234\n");
+ printf(" When contiguous blocks are written to a dataset, the dataset is divided\n");
+ printf(" into '# processes' regions and each process writes data to its own region.\n");
+ printf(" When interleaved blocks are written to a dataset, space for the first\n");
+ printf(" block of the first process is allocated in the dataset, then space is\n");
+ printf(" allocated for the first block of the second process, etc. until space is\n");
+ printf(" allocated for the first block of each process, then space is allocated for\n");
+ printf(" the second block of the first process, the second block of the second\n");
+ printf(" process, etc.\n");
+ printf("\n");
+ printf(" For example, with a 4 process run, 1MB bytes-per-process, 256KB transfer\n");
+ printf(" buffer size, and 64KB block size,\n");
+ printf(" 16 contiguous blocks per process are written to the file like so:\n");
+ printf(" 1111111111111111222222222222222233333333333333334444444444444444\n");
+ printf(" 16 interleaved blocks per process are written to the file like so:\n");
+ printf(" 1234123412341234123412341234123412341234123412341234123412341234\n");
+ printf(" If collective I/O is turned on, all of the four blocks per transfer\n");
+ printf(" buffer will be written in one collective I/O call.\n");
printf("\n");
printf(" DL - is a list of debugging flags. Valid values are:\n");
printf(" 1 - Minimal\n");
printf(" 2 - Not quite everything\n");
printf(" 3 - Everything\n");
- printf(" 4 - Everything and the kitchen sink\n");
+ printf(" 4 - The kitchen sink\n");
printf(" r - Raw data I/O throughput information\n");
printf(" t - Times as well as throughputs\n");
printf(" v - Verify data correctness\n");
printf("\n");
printf(" Example: --debug=2,r,t\n");
printf("\n");
+ printf(" Environment variables:\n");
+ printf(" HDF5_NOCLEANUP Do not remove data files if set [default remove]\n");
+ printf(" HDF5_MPI_INFO MPI INFO object key=value separated by ;\n");
+ printf(" HDF5_PARAPREFIX Paralllel data files prefix\n");
fflush(stdout);
}
}
diff --git a/perform/pio_perf.h b/perform/pio_perf.h
index e245e8a..5053eb2 100644
--- a/perform/pio_perf.h
+++ b/perform/pio_perf.h
@@ -36,9 +36,10 @@ typedef struct parameters_ {
int num_procs; /* Maximum number of processes to use */
long num_files; /* Number of files to create */
long num_dsets; /* Number of datasets to create */
- off_t num_elmts; /* Number of native ints in each dset */
+ off_t num_bytes; /* Number of bytes in each dset */
int num_iters; /* Number of times to loop doing the IO */
size_t buf_size; /* Buffer size */
+ size_t blk_size; /* Block size */
unsigned interleaved; /* Interleaved vs. contiguous blocks */
unsigned collective; /* Collective vs. independent I/O */
hsize_t h5_align; /* HDF5 object alignment */