diff options
-rw-r--r-- | perform/pio_engine.c | 305 | ||||
-rw-r--r-- | perform/pio_perf.c | 548 | ||||
-rw-r--r-- | perform/pio_perf.h | 8 | ||||
-rw-r--r-- | perform/pio_timer.c | 93 | ||||
-rw-r--r-- | perform/pio_timer.h | 18 |
5 files changed, 607 insertions, 365 deletions
diff --git a/perform/pio_engine.c b/perform/pio_engine.c index 3c6adf0..54478a8 100644 --- a/perform/pio_engine.c +++ b/perform/pio_engine.c @@ -55,13 +55,13 @@ } while(0) -/* Raw I/O macros */ -#define RAWCREATE(fn) HDopen(fn, O_CREAT|O_TRUNC|O_RDWR, 0600) -#define RAWOPEN(fn, F) HDopen(fn, F, 0600) -#define RAWCLOSE(F) HDclose(F) -#define RAWSEEK(F,L) HDlseek(F, L, SEEK_SET) -#define RAWWRITE(F,B,S) HDwrite(F,B,S) -#define RAWREAD(F,B,S) HDread(F,B,S) +/* POSIX I/O macros */ +#define POSIXCREATE(fn) HDopen(fn, O_CREAT|O_TRUNC|O_RDWR, 0600) +#define POSIXOPEN(fn, F) HDopen(fn, F, 0600) +#define POSIXCLOSE(F) HDclose(F) +#define POSIXSEEK(F,L) HDlseek(F, L, SEEK_SET) +#define POSIXWRITE(F,B,S) HDwrite(F,B,S) +#define POSIXREAD(F,B,S) HDread(F,B,S) enum { PIO_CREATE = 1, @@ -97,7 +97,7 @@ static int clean_file_g = -1; /*whether to cleanup temporary test */ /* the different types of file descriptors we can expect */ typedef union _file_descr { - int rawfd; /* raw/Unix file */ + int posixfd; /* POSIX file handle*/ MPI_File mpifd; /* MPI file */ hid_t h5fd; /* HDF5 file */ } file_descr; @@ -105,11 +105,11 @@ typedef union _file_descr { /* local functions */ static char *pio_create_filename(iotype iot, const char *base_name, char *fullname, size_t size); -static herr_t do_write(file_descr *fd, iotype iot, long ndsets, - long nelmts, size_t buf_size, void *buffer); -static herr_t do_read(file_descr *fd, iotype iot, long ndsets, - long nelmts, size_t buf_size, void *buffer /*out*/); -static herr_t do_fopen(iotype iot, char *fname, file_descr *fd /*out*/, +static herr_t do_write(results *res, file_descr *fd, parameters *parms, + long ndsets, off_t nelmts, size_t buf_size, void *buffer); +static herr_t do_read(results *res, file_descr *fd, parameters *parms, + long ndsets, off_t nelmts, size_t buf_size, void *buffer /*out*/); +static herr_t do_fopen(parameters *param, char *fname, file_descr *fd /*out*/, int flags); static herr_t do_fclose(iotype iot, file_descr *fd); static void do_cleanupfile(iotype iot, char *fname); @@ -135,7 +135,7 @@ do_pio(parameters param) int maxprocs; int nfiles, nf; long ndsets; - long nelmts; + off_t nelmts; char *buffer = NULL; /*data buffer pointer */ size_t buf_size; /*data buffer size in bytes */ @@ -157,8 +157,8 @@ do_pio(parameters param) fd.mpifd = MPI_FILE_NULL; res.timers = pio_time_new(MPI_TIMER); break; - case RAWIO: - fd.rawfd = -1; + case POSIXIO: + fd.posixfd = -1; res.timers = pio_time_new(MPI_TIMER); break; case PHDF5: @@ -191,13 +191,6 @@ do_pio(parameters param) GOTOERROR(FAIL); } - if (nelmts <= 0 ) { - fprintf(stderr, - "number of elements per dataset must be > 0 (%ld)\n", - nelmts); - GOTOERROR(FAIL); - } - if (maxprocs <= 0 ) { fprintf(stderr, "maximum number of process to use must be > 0 (%d)\n", @@ -205,11 +198,6 @@ do_pio(parameters param) GOTOERROR(FAIL); } - if (buf_size <= 0 ){ - fprintf(stderr, - "buffer size must be > 0 (%ld)\n", buf_size); - GOTOERROR(FAIL); - } #if akcdebug /* debug*/ @@ -227,12 +215,14 @@ buf_size=MIN(1024*1024, buf_size); #endif /* allocate data buffer */ - buffer = malloc(buf_size); + if(buf_size>0) { + buffer = malloc(buf_size); - if (buffer == NULL){ - fprintf(stderr, "malloc for data buffer size (%ld) failed\n", - buf_size); - GOTOERROR(FAIL); + if (buffer == NULL){ + fprintf(stderr, "malloc for data buffer size (%ld) failed\n", + buf_size); + GOTOERROR(FAIL); + } } if (pio_debug_level >= 4) { @@ -261,12 +251,12 @@ fprintf(stderr, "filename=%s\n", fname); #endif set_time(res.timers, HDF5_GROSS_WRITE_FIXED_DIMS, START); - hrc = do_fopen(iot, fname, &fd, PIO_CREATE | PIO_WRITE); + hrc = do_fopen(¶m, fname, &fd, PIO_CREATE | PIO_WRITE); VRFY((hrc == SUCCESS), "do_fopen failed"); set_time(res.timers, HDF5_FINE_WRITE_FIXED_DIMS, START); - hrc = do_write(&fd, iot, ndsets, nelmts, buf_size, buffer); + hrc = do_write(&res, &fd, ¶m, ndsets, nelmts, buf_size, buffer); set_time(res.timers, HDF5_FINE_WRITE_FIXED_DIMS, STOP); VRFY((hrc == SUCCESS), "do_write failed"); @@ -284,12 +274,12 @@ fprintf(stderr, "filename=%s\n", fname); */ /* Open file for read */ set_time(res.timers, HDF5_GROSS_READ_FIXED_DIMS, START); - hrc = do_fopen(iot, fname, &fd, PIO_READ); + hrc = do_fopen(¶m, fname, &fd, PIO_READ); VRFY((hrc == SUCCESS), "do_fopen failed"); set_time(res.timers, HDF5_FINE_READ_FIXED_DIMS, START); - hrc = do_read(&fd, iot, ndsets, nelmts, buf_size, buffer); + hrc = do_read(&res, &fd, ¶m, ndsets, nelmts, buf_size, buffer); set_time(res.timers, HDF5_FINE_READ_FIXED_DIMS, STOP); VRFY((hrc == SUCCESS), "do_read failed"); @@ -311,8 +301,8 @@ done: /* close any opened files */ /* no remove(fname) because that should have happened normally. */ switch (iot) { - case RAWIO: - if (fd.rawfd != -1) + case POSIXIO: + if (fd.posixfd != -1) hrc = do_fclose(iot, &fd); break; case MPIO: @@ -326,7 +316,8 @@ done: } /* release generic resources */ - free(buffer); + if(buffer) + free(buffer); res.ret_code = ret_code; return res; } @@ -354,8 +345,8 @@ pio_create_filename(iotype iot, const char *base_name, char *fullname, size_t si memset(fullname, 0, size); switch (iot) { - case RAWIO: - suffix = ".raw"; + case POSIXIO: + suffix = ".posix"; break; case MPIO: suffix = ".mpio"; @@ -450,23 +441,25 @@ pio_create_filename(iotype iot, const char *base_name, char *fullname, size_t si * Modifications: */ static herr_t -do_write(file_descr *fd, iotype iot, long ndsets, - long nelmts, size_t buf_size, void *buffer) +do_write(results *res, file_descr *fd, parameters *parms, long ndsets, + off_t nelmts, size_t buf_size, void *buffer) { int ret_code = SUCCESS; - int rc; /*routine return code */ + long rc; /*routine return code */ int mrc; /*MPI return code */ MPI_Offset mpi_offset; MPI_Status mpi_status; long ndset; - long nelmts_towrite, nelmts_written; + off_t nelmts_written; + size_t nelmts_towrite; char dname[64]; off_t dset_offset; /*dataset offset in a file */ off_t file_offset; /*file offset of the next transfer */ off_t dset_size; /*one dataset size in bytes */ - long nelmts_in_buf; - long elmts_begin; /*first elmt this process transfer */ - long elmts_count; /*number of elmts this process transfer */ + size_t nelmts_in_buf; + off_t elmts_begin; /*first elmt this process transfer */ + off_t elmts_count; /*number of elmts this process transfer */ + hid_t dcpl = -1; /* Dataset creation property list */ /* HDF5 variables */ herr_t hrc; /*HDF5 return code */ @@ -487,16 +480,28 @@ fprintf(stderr, "buffer size=%ld\n", buf_size); nelmts_in_buf = buf_size/ELMT_SIZE; /* hdf5 data space setup */ - if (iot == PHDF5){ - /* define a contiquous dataset of nelmts native ints */ - h5dims[0] = nelmts; - h5dset_space_id = H5Screate_simple(1, h5dims, NULL); - VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + if (parms->io_type == PHDF5){ + if(nelmts>0) { + /* define a contiquous dataset of nelmts native ints */ + h5dims[0] = nelmts; + h5dset_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + } /* end if */ + else { + h5dset_space_id = H5Screate(H5S_SCALAR); + VRFY((h5dset_space_id >= 0), "H5Screate"); + } /* end else */ /* create the memory dataspace that corresponds to the xfer buffer */ - h5dims[0] = nelmts_in_buf; - h5mem_space_id = H5Screate_simple(1, h5dims, NULL); - VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + if(nelmts_in_buf>0) { + h5dims[0] = nelmts_in_buf; + h5mem_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + } /* end if */ + else { + h5mem_space_id = H5Screate(H5S_SCALAR); + VRFY((h5mem_space_id >= 0), "H5Screate"); + } /* end else */ } for (ndset = 1; ndset <= ndsets; ++ndset) { @@ -504,23 +509,58 @@ fprintf(stderr, "buffer size=%ld\n", buf_size); /* Calculate dataset offset within a file */ /* create dataset */ - switch (iot) { - case RAWIO: + switch (parms->io_type) { + case POSIXIO: case MPIO: - /* both raw and mpi io just need dataset offset in file*/ + /* both posix and mpi io just need dataset offset in file*/ dset_offset = (ndset - 1) * dset_size; break; case PHDF5: + dcpl = H5Pcreate(H5P_DATASET_CREATE); + if (dcpl < 0) { + fprintf(stderr, "HDF5 Property List Create failed\n"); + GOTOERROR(FAIL); + } + + /* Make the dataset chunked if asked */ + if(parms->h5_use_chunks) { + /* Set the chunk size to be the same as the buffer size */ + h5dims[0] = nelmts_in_buf; + hrc = H5Pset_chunk(dcpl, 1, h5dims); + if (hrc < 0) { + fprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } /* end if */ + } /* end if */ + +#if H5_VERS_MAJOR > 1 || H5_VERS_MINOR > 4 + /* Disable writing fill values if asked */ + if(parms->h5_no_fill) { + hrc = H5Pset_fill_time(dcpl, H5D_FILL_TIME_NEVER); + if (hrc < 0) { + fprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } /* end if */ + } /* end if */ +#endif + sprintf(dname, "Dataset_%ld", ndset); h5ds_id = H5Dcreate(fd->h5fd, dname, H5T_NATIVE_INT, - h5dset_space_id, H5P_DEFAULT); + h5dset_space_id, dcpl); if (h5ds_id < 0) { fprintf(stderr, "HDF5 Dataset Create failed\n"); GOTOERROR(FAIL); } + hrc = H5Pclose(dcpl); + /* verifying the close of the dcpl */ + if (hrc < 0) { + fprintf(stderr, "HDF5 Property List Close failed\n"); + GOTOERROR(FAIL); + } + break; } @@ -529,11 +569,11 @@ fprintf(stderr, "buffer size=%ld\n", buf_size); * and the next process. Count of elements is the difference between * these two beginnings. This way, it avoids any rounding errors. */ - elmts_begin = (nelmts*1.0)/pio_mpi_nprocs_g*pio_mpi_rank_g; + elmts_begin = (long)(((double)nelmts)/pio_mpi_nprocs_g*pio_mpi_rank_g); if (pio_mpi_rank_g < (pio_mpi_nprocs_g - 1)) - elmts_count = ((nelmts * 1.0) / pio_mpi_nprocs_g * (pio_mpi_rank_g + 1)) - - elmts_begin; + elmts_count = (long)((((double)nelmts) / pio_mpi_nprocs_g * (pio_mpi_rank_g + 1)) + - (double)elmts_begin); else /* last process. Take whatever are left */ elmts_count = nelmts - elmts_begin; @@ -545,10 +585,13 @@ fprintf(stderr, "proc %d: elmts_begin=%ld, elmts_count=%ld\n", nelmts_written = 0 ; + /* Start "raw data" write timer */ + set_time(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, START); + while (nelmts_written < elmts_count){ nelmts_towrite = elmts_count - nelmts_written; - if (elmts_count - nelmts_written >= nelmts_in_buf) { + if ((elmts_count - nelmts_written) >= nelmts_in_buf) { nelmts_towrite = nelmts_in_buf; } else { /* last write of a partial buffer */ @@ -568,8 +611,8 @@ fprintf(stderr, "proc %d: elmts_begin=%ld, elmts_count=%ld\n", /* Write */ /* Calculate offset of write within a dataset/file */ - switch (iot) { - case RAWIO: + switch (parms->io_type) { + case POSIXIO: /* need to (off_t) the elmnts_begin expression because they */ /* may be of smaller sized integer types */ file_offset = dset_offset + (off_t)(elmts_begin + nelmts_written)*ELMT_SIZE; @@ -579,10 +622,10 @@ fprintf(stderr, "proc %d: writes %ld bytes at file-offset %ld\n", pio_mpi_rank_g, nelmts_towrite*ELMT_SIZE, file_offset); #endif - rc = RAWSEEK(fd->rawfd, file_offset); - VRFY((rc>=0), "RAWSEEK"); - rc = RAWWRITE(fd->rawfd, buffer, (size_t)(nelmts_towrite * ELMT_SIZE)); - VRFY((rc == (nelmts_towrite*ELMT_SIZE)), "RAWWRITE"); + rc = POSIXSEEK(fd->posixfd, file_offset); + VRFY((rc>=0), "POSIXSEEK"); + rc = POSIXWRITE(fd->posixfd, buffer, (size_t)(nelmts_towrite * ELMT_SIZE)); + VRFY((rc == (nelmts_towrite*ELMT_SIZE)), "POSIXWRITE"); break; case MPIO: @@ -594,7 +637,7 @@ fprintf(stderr, "proc %d: writes %ld bytes at mpi-offset %ld\n", #endif mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buffer, - nelmts_towrite * ELMT_SIZE, MPI_CHAR, + (int)(nelmts_towrite*ELMT_SIZE), MPI_CHAR, &mpi_status); VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE"); break; @@ -628,10 +671,13 @@ fprintf(stderr, "proc %d: writes %ld bytes at mpi-offset %ld\n", nelmts_written += nelmts_towrite; } + /* Stop "raw data" write timer */ + set_time(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, STOP); + /* Calculate write time */ /* Close dataset. Only HDF5 needs to do an explicit close. */ - if (iot == PHDF5){ + if (parms->io_type == PHDF5){ hrc = H5Dclose(h5ds_id); if (hrc < 0) { @@ -676,23 +722,24 @@ done: * Modifications: */ static herr_t -do_read(file_descr *fd, iotype iot, long ndsets, - long nelmts, size_t buf_size, void *buffer /*out*/) +do_read(results *res, file_descr *fd, parameters *parms, long ndsets, + off_t nelmts, size_t buf_size, void *buffer /*out*/) { int ret_code = SUCCESS; - int rc; /*routine return code */ + long rc; /*routine return code */ int mrc; /*MPI return code */ MPI_Offset mpi_offset; MPI_Status mpi_status; long ndset; - long nelmts_toread, nelmts_read; + size_t nelmts_toread; + off_t nelmts_read; char dname[64]; off_t dset_offset; /*dataset offset in a file */ off_t file_offset; /*file offset of the next transfer */ off_t dset_size; /*one dataset size in bytes */ - long nelmts_in_buf; - long elmts_begin; /*first elmt this process transfer */ - long elmts_count; /*number of elmts this process transfer */ + size_t nelmts_in_buf; + off_t elmts_begin; /*first elmt this process transfer */ + off_t elmts_count; /*number of elmts this process transfer */ /* HDF5 variables */ herr_t hrc; /*HDF5 return code */ @@ -713,26 +760,38 @@ fprintf(stderr, "buffer size=%ld\n", buf_size); nelmts_in_buf = buf_size/ELMT_SIZE; /* hdf5 data space setup */ - if (iot == PHDF5){ - /* define a contiquous dataset of nelmts native ints */ - h5dims[0] = nelmts; - h5dset_space_id = H5Screate_simple(1, h5dims, NULL); - VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + if (parms->io_type == PHDF5){ + if(nelmts>0) { + /* define a contiquous dataset of nelmts native ints */ + h5dims[0] = nelmts; + h5dset_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + } /* end if */ + else { + h5dset_space_id = H5Screate(H5S_SCALAR); + VRFY((h5dset_space_id >= 0), "H5Screate"); + } /* end else */ /* create the memory dataspace that corresponds to the xfer buffer */ - h5dims[0] = nelmts_in_buf; - h5mem_space_id = H5Screate_simple(1, h5dims, NULL); - VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + if(nelmts_in_buf>0) { + h5dims[0] = nelmts_in_buf; + h5mem_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + } /* end if */ + else { + h5mem_space_id = H5Screate(H5S_SCALAR); + VRFY((h5mem_space_id >= 0), "H5Screate"); + } /* end else */ } for (ndset = 1; ndset <= ndsets; ++ndset) { /* Calculate dataset offset within a file */ /* create dataset */ - switch (iot) { - case RAWIO: + switch (parms->io_type) { + case POSIXIO: case MPIO: - /* both raw and mpi io just need dataset offset in file*/ + /* both posix and mpi io just need dataset offset in file*/ dset_offset = (ndset - 1) * dset_size; break; @@ -753,11 +812,11 @@ fprintf(stderr, "buffer size=%ld\n", buf_size); * and the next process. Count of elements is the difference between * these two beginnings. This way, it avoids any rounding errors. */ - elmts_begin = (nelmts*1.0)/pio_mpi_nprocs_g*pio_mpi_rank_g; + elmts_begin = (long)(((double)nelmts)/pio_mpi_nprocs_g*pio_mpi_rank_g); if (pio_mpi_rank_g < (pio_mpi_nprocs_g - 1)) - elmts_count = ((nelmts * 1.0) / pio_mpi_nprocs_g * (pio_mpi_rank_g + 1)) - - elmts_begin; + elmts_count = (long)((((double)nelmts) / pio_mpi_nprocs_g * (pio_mpi_rank_g + 1)) - + (double)elmts_begin); else /* last process. Take whatever are left */ elmts_count = nelmts - elmts_begin; @@ -769,10 +828,13 @@ fprintf(stderr, "proc %d: elmts_begin=%ld, elmts_count=%ld\n", nelmts_read = 0 ; + /* Start "raw data" read timer */ + set_time(res->timers, HDF5_RAW_READ_FIXED_DIMS, START); + while (nelmts_read < elmts_count){ nelmts_toread = elmts_count - nelmts_read; - if (elmts_count - nelmts_read >= nelmts_in_buf) + if ((elmts_count - nelmts_read) >= nelmts_in_buf) nelmts_toread = nelmts_in_buf; else /* last read of a partial buffer */ @@ -780,8 +842,8 @@ fprintf(stderr, "proc %d: elmts_begin=%ld, elmts_count=%ld\n", /* read */ /* Calculate offset of read within a dataset/file */ - switch (iot){ - case RAWIO: + switch (parms->io_type){ + case POSIXIO: /* need to (off_t) the elmnts_begin expression because they */ /* may be of smaller sized integer types */ file_offset = dset_offset + (off_t)(elmts_begin + nelmts_read)*ELMT_SIZE; @@ -791,10 +853,10 @@ fprintf(stderr, "proc %d: read %ld bytes at file-offset %ld\n", pio_mpi_rank_g, nelmts_toread*ELMT_SIZE, file_offset); #endif - rc = RAWSEEK(fd->rawfd, file_offset); - VRFY((rc>=0), "RAWSEEK"); - rc = RAWREAD(fd->rawfd, buffer, (size_t)(nelmts_toread*ELMT_SIZE)); - VRFY((rc==(nelmts_toread*ELMT_SIZE)), "RAWREAD"); + rc = POSIXSEEK(fd->posixfd, file_offset); + VRFY((rc>=0), "POSIXSEEK"); + rc = POSIXREAD(fd->posixfd, buffer, (size_t)(nelmts_toread*ELMT_SIZE)); + VRFY((rc==(nelmts_toread*ELMT_SIZE)), "POSIXREAD"); break; case MPIO: @@ -806,7 +868,7 @@ fprintf(stderr, "proc %d: read %ld bytes at mpi-offset %ld\n", #endif mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buffer, - nelmts_toread*ELMT_SIZE, MPI_CHAR, + (int)(nelmts_toread*ELMT_SIZE), MPI_CHAR, &mpi_status); VRFY((mrc==MPI_SUCCESS), "MPIO_read"); break; @@ -853,10 +915,13 @@ fprintf(stderr, "proc %d: read %ld bytes at mpi-offset %ld\n", nelmts_read += nelmts_toread; } + /* Stop "raw data" read timer */ + set_time(res->timers, HDF5_RAW_READ_FIXED_DIMS, STOP); + /* Calculate read time */ /* Close dataset. Only HDF5 needs to do an explicit close. */ - if (iot == PHDF5){ + if (parms->io_type == PHDF5){ hrc = H5Dclose(h5ds_id); if (hrc < 0) { @@ -901,26 +966,26 @@ done: * Modifications: */ static herr_t -do_fopen(iotype iot, char *fname, file_descr *fd /*out*/, int flags) +do_fopen(parameters *param, char *fname, file_descr *fd /*out*/, int flags) { int ret_code = SUCCESS, mrc; herr_t hrc; hid_t acc_tpl = -1; /* file access templates */ - switch (iot) { - case RAWIO: + switch (param->io_type) { + case POSIXIO: if (flags & (PIO_CREATE | PIO_WRITE)) - fd->rawfd = RAWCREATE(fname); + fd->posixfd = POSIXCREATE(fname); else - fd->rawfd = RAWOPEN(fname, O_RDONLY); + fd->posixfd = POSIXOPEN(fname, O_RDONLY); - if (fd->rawfd < 0 ) { - fprintf(stderr, "Raw File Open failed(%s)\n", fname); + if (fd->posixfd < 0 ) { + fprintf(stderr, "POSIX File Open failed(%s)\n", fname); GOTOERROR(FAIL); } - /* The perils of raw I/O in a parallel environment. The problem is: + /* The perils of POSIX I/O in a parallel environment. The problem is: * * - Process n opens a file with truncation and then starts * writing to the file. @@ -965,14 +1030,20 @@ do_fopen(iotype iot, char *fname, file_descr *fd /*out*/, int flags) case PHDF5: acc_tpl = H5Pcreate(H5P_FILE_ACCESS); - if (acc_tpl < 0) { fprintf(stderr, "HDF5 Property List Create failed\n"); GOTOERROR(FAIL); } + /* Set the file driver to the MPI-I/O driver */ hrc = H5Pset_fapl_mpio(acc_tpl, pio_comm_g, pio_info_g); + if (hrc < 0) { + fprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } + /* Set the alignment of objects in HDF5 file */ + hrc = H5Pset_alignment(acc_tpl, param->h5_thresh, param->h5_align); if (hrc < 0) { fprintf(stderr, "HDF5 Property List Set failed\n"); GOTOERROR(FAIL); @@ -1019,15 +1090,15 @@ do_fclose(iotype iot, file_descr *fd /*out*/) int mrc = 0, rc = 0; switch (iot) { - case RAWIO: - rc = RAWCLOSE(fd->rawfd); + case POSIXIO: + rc = POSIXCLOSE(fd->posixfd); if (rc != 0){ - fprintf(stderr, "Raw File Close failed\n"); + fprintf(stderr, "POSIX File Close failed\n"); GOTOERROR(FAIL); } - fd->rawfd = -1; + fd->posixfd = -1; break; case MPIO: @@ -1078,7 +1149,7 @@ do_cleanupfile(iotype iot, char *fname) if (clean_file_g){ switch (iot){ - case RAWIO: + case POSIXIO: remove(fname); break; case MPIO: diff --git a/perform/pio_perf.c b/perform/pio_perf.c index 0f6cec6..e44e2b8 100644 --- a/perform/pio_perf.c +++ b/perform/pio_perf.c @@ -13,7 +13,7 @@ * This is what the report should look like: * * nprocs = Max#Procs - * IO Type = RAWIO + * IO API = POSIXIO * # Files = 1, # of dsets = 1000, Elements per dset = 37000 * Write Results = x MB/s * Read Results = x MB/s @@ -23,7 +23,7 @@ * * . . . * - * IO Type = MPIO + * IO API = MPIO * # Files = 1, # of dsets = 1000, Elements per dset = 37000 * Write Results = x MB/s * Read Results = x MB/s @@ -33,7 +33,7 @@ * * . . . * - * IO Type = PHDF5 + * IO API = PHDF5 * # Files = 1, # of dsets = 1000, Elements per dset = 37000 * Write Results = x MB/s * Read Results = x MB/s @@ -72,11 +72,12 @@ #define ONE_MB (ONE_KB * ONE_KB) #define ONE_GB (ONE_MB * ONE_KB) -#define PIO_RAW 0x10 +#define PIO_POSIX 0x10 #define PIO_MPI 0x20 #define PIO_HDF5 0x40 -#define MB_PER_SEC(bytes,t) (((bytes) / ONE_MB) / t) +/* report 0.0 in case t is zero too */ +#define MB_PER_SEC(bytes,t) (((t)==0.0) ? 0.0 : ((((double)bytes) / ONE_MB) / (t))) /* global variables */ FILE *output; /* output file */ @@ -91,6 +92,7 @@ int pio_debug_level = 0;/* The debug level: * 1 - Minimal * 2 - Some more * 3 - Maximal + * 4 - Maximal & then some */ /* local variables */ @@ -102,22 +104,32 @@ static const char *progname = "pio_perf"; * adding more, make sure that they don't clash with each other. */ #if 1 -static const char *s_opts = "hD:f:HP:p:X:x:md:F:i:o:r"; +static const char *s_opts = "ha:A:cD:f:P:p:X:x:nd:F:i:o:stT:"; #else -static const char *s_opts = "hbD:f:HP:p:X:x:md:F:i:o:r"; +static const char *s_opts = "ha:A:bcD:f:P:p:X:x:nd:F:i:o:stT:"; #endif /* 1 */ static struct long_options l_opts[] = { { "help", no_arg, 'h' }, { "hel", no_arg, 'h' }, { "he", no_arg, 'h' }, + { "align", require_arg, 'a' }, + { "alig", require_arg, 'a' }, + { "ali", require_arg, 'a' }, + { "al", require_arg, 'a' }, + { "api", require_arg, 'A' }, + { "ap", require_arg, 'A' }, #if 0 - /* a siting of the elusive binary option */ + /* a sighting of the elusive binary option */ { "binary", no_arg, 'b' }, { "binar", no_arg, 'b' }, { "bina", no_arg, 'b' }, { "bin", no_arg, 'b' }, { "bi", no_arg, 'b' }, #endif /* 0 */ + { "chunk", no_arg, 'c' }, + { "chun", no_arg, 'c' }, + { "chu", no_arg, 'c' }, + { "ch", no_arg, 'c' }, { "debug", require_arg, 'D' }, { "debu", require_arg, 'D' }, { "deb", require_arg, 'D' }, @@ -130,9 +142,6 @@ static struct long_options l_opts[] = { { "file", require_arg, 'f' }, { "fil", require_arg, 'f' }, { "fi", require_arg, 'f' }, - { "hdf5", no_arg, 'H' }, - { "hdf", no_arg, 'H' }, - { "hd", no_arg, 'H' }, { "max-num-processes", require_arg, 'P' }, { "max-num-processe", require_arg, 'P' }, { "max-num-process", require_arg, 'P' }, @@ -167,10 +176,12 @@ static struct long_options l_opts[] = { { "min-xfe", require_arg, 'x' }, { "min-xf", require_arg, 'x' }, { "min-x", require_arg, 'x' }, - { "mpiio", no_arg, 'm' }, - { "mpii", no_arg, 'm' }, - { "mpi", no_arg, 'm' }, - { "mp", no_arg, 'm' }, + { "no-fill", no_arg, 'n' }, + { "no-fil", no_arg, 'n' }, + { "no-fi", no_arg, 'n' }, + { "no-f", no_arg, 'n' }, + { "no-", no_arg, 'n' }, + { "no", no_arg, 'n' }, { "num-dsets", require_arg, 'd' }, { "num-dset", require_arg, 'd' }, { "num-dse", require_arg, 'd' }, @@ -196,8 +207,14 @@ static struct long_options l_opts[] = { { "outp", require_arg, 'o' }, { "out", require_arg, 'o' }, { "ou", require_arg, 'o' }, - { "raw", no_arg, 'r' }, - { "ra", no_arg, 'r' }, + { "threshold", require_arg, 'T' }, + { "threshol", require_arg, 'T' }, + { "thresho", require_arg, 'T' }, + { "thresh", require_arg, 'T' }, + { "thres", require_arg, 'T' }, + { "thre", require_arg, 'T' }, + { "thr", require_arg, 'T' }, + { "th", require_arg, 'T' }, { NULL, 0, '\0' } }; @@ -208,10 +225,16 @@ struct options { long num_dsets; /* number of datasets */ long num_files; /* number of files */ long num_iters; /* number of iterations */ - long max_num_procs; /* maximum number of processes to use */ - long min_num_procs; /* minimum number of processes to use */ + int max_num_procs; /* maximum number of processes to use */ + int min_num_procs; /* minimum number of processes to use */ size_t max_xfer_size; /* maximum transfer buffer size */ size_t min_xfer_size; /* minimum transfer buffer size */ + int print_times; /* print times as well as throughputs */ + int print_raw; /* print raw data throughput info */ + off_t h5_alignment; /* alignment in HDF5 file */ + off_t h5_threshold; /* threshold for alignment in HDF5 file */ + int h5_use_chunks; /* Make HDF5 dataset chunked */ + int h5_no_fill; /* Disable HDF5 writing fill values */ }; typedef struct _minmax { @@ -225,12 +248,13 @@ typedef struct _minmax { static off_t parse_size_directive(const char *size); static struct options *parse_command_line(int argc, char *argv[]); static void run_test_loop(struct options *options); -static int run_test(iotype iot, parameters parms); +static int run_test(iotype iot, parameters parms, struct options *opts); static void output_all_info(minmax *mm, int count, int indent_level); static void get_minmax(minmax *mm, double val); -static minmax accumulate_minmax_stuff(minmax *mm, off_t raw_size, int count); +static minmax accumulate_minmax_stuff(minmax *mm, int count); static int create_comm_world(int num_procs, int *doing_pio); static int destroy_comm_world(void); +static void output_results(const struct options *options, const char *name, minmax *table, int table_size,off_t data_size); static void output_report(const char *fmt, ...); static void print_indent(register int indent); static void usage(const char *prog); @@ -315,8 +339,8 @@ finish: * processors to use. For each loop iteration, we divide that * number by 2 and rerun the test. * - * - The second slowest is what type of IO to perform. We have - * three choices: RAWIO, MPI-IO, and PHDF5. + * - The second slowest is what type of IO API to perform. We have + * three choices: POSIXIO, MPI-IO, and PHDF5. * * - Then we change the size of the buffer. This information is * inferred from the number of datasets to create and the number @@ -331,9 +355,9 @@ static void run_test_loop(struct options *opts) { parameters parms; - long num_procs; + int num_procs; int doing_pio; /* if this process is doing PIO */ - int io_runs = PIO_HDF5 | PIO_MPI | PIO_RAW; /* default to run all tests */ + int io_runs = PIO_HDF5 | PIO_MPI | PIO_POSIX; /* default to run all tests */ if (opts->io_types & ~0x7) { /* we want to run only a select subset of these tests */ @@ -345,13 +369,17 @@ run_test_loop(struct options *opts) if (opts->io_types & PIO_MPI) io_runs |= PIO_MPI; - if (opts->io_types & PIO_RAW) - io_runs |= PIO_RAW; + if (opts->io_types & PIO_POSIX) + io_runs |= PIO_POSIX; } parms.num_files = opts->num_files; parms.num_dsets = opts->num_dsets; parms.num_iters = opts->num_iters; + parms.h5_align = opts->h5_alignment; + parms.h5_thresh = opts->h5_threshold; + parms.h5_use_chunks = opts->h5_use_chunks; + parms.h5_no_fill = opts->h5_no_fill; /* start with max_num_procs and decrement it by half for each loop. */ /* if performance needs restart, fewer processes may be needed. */ @@ -373,24 +401,28 @@ run_test_loop(struct options *opts) for (buf_size = opts->min_xfer_size; buf_size <= opts->max_xfer_size; buf_size <<= 1) { parms.buf_size = buf_size; - parms.num_elmts = opts->file_size / (parms.num_dsets * sizeof(int)); + parms.num_elmts = opts->file_size / (off_t)(parms.num_dsets * sizeof(int)); print_indent(1); output_report("Transfer Buffer Size: %ld bytes, File size: %.2f MBs\n", buf_size, - ((double)parms.num_dsets * parms.num_elmts * sizeof(int)) / ONE_MB); + ((double)parms.num_dsets * (double)parms.num_elmts * (double)sizeof(int)) / ONE_MB); print_indent(1); output_report(" # of files: %ld, # of dsets: %ld, # of elmts per dset: %ld\n", parms.num_files, parms.num_dsets, parms.num_elmts); - if (io_runs & PIO_RAW) - run_test(RAWIO, parms); + if (io_runs & PIO_POSIX) + run_test(POSIXIO, parms, opts); if (io_runs & PIO_MPI) - run_test(MPIO, parms); + run_test(MPIO, parms, opts); if (io_runs & PIO_HDF5) - run_test(PHDF5, parms); + run_test(PHDF5, parms, opts); + + /* Run the tests once if buf_size==0, but then break out */ + if(buf_size==0) + break; } if (destroy_comm_world() != SUCCESS) { @@ -408,34 +440,37 @@ run_test_loop(struct options *opts) * Modifications: */ static int -run_test(iotype iot, parameters parms) +run_test(iotype iot, parameters parms, struct options *opts) { results res; register int i, ret_value = SUCCESS; int comm_size; off_t raw_size; - minmax total_mm; - minmax *write_mpi_mm_table; - minmax *write_mm_table; - minmax *write_gross_mm_table; - minmax *read_mpi_mm_table; - minmax *read_mm_table; - minmax *read_gross_mm_table; + minmax *write_mpi_mm_table=NULL; + minmax *write_mm_table=NULL; + minmax *write_gross_mm_table=NULL; + minmax *write_raw_mm_table=NULL; + minmax *read_mpi_mm_table=NULL; + minmax *read_mm_table=NULL; + minmax *read_gross_mm_table=NULL; + minmax *read_raw_mm_table=NULL; minmax write_mpi_mm = {0.0, 0.0, 0.0, 0}; minmax write_mm = {0.0, 0.0, 0.0, 0}; minmax write_gross_mm = {0.0, 0.0, 0.0, 0}; + minmax write_raw_mm = {0.0, 0.0, 0.0, 0}; minmax read_mpi_mm = {0.0, 0.0, 0.0, 0}; minmax read_mm = {0.0, 0.0, 0.0, 0}; minmax read_gross_mm = {0.0, 0.0, 0.0, 0}; + minmax read_raw_mm = {0.0, 0.0, 0.0, 0}; - raw_size = parms.num_dsets * parms.num_elmts * sizeof(int); + raw_size = (off_t)parms.num_dsets * (off_t)parms.num_elmts * (off_t)sizeof(int); parms.io_type = iot; print_indent(2); - output_report("Type of IO = "); + output_report("IO API = "); switch (iot) { - case RAWIO: - output_report("Raw\n"); + case POSIXIO: + output_report("POSIX\n"); break; case MPIO: output_report("MPIO\n"); @@ -447,46 +482,18 @@ run_test(iotype iot, parameters parms) MPI_Comm_size(pio_comm_g, &comm_size); - write_mpi_mm_table = malloc(parms.num_iters * sizeof(minmax)); - write_mm_table = malloc(parms.num_iters * sizeof(minmax)); - write_gross_mm_table = malloc(parms.num_iters * sizeof(minmax)); - read_mpi_mm_table = malloc(parms.num_iters * sizeof(minmax)); - read_mm_table = malloc(parms.num_iters * sizeof(minmax)); - read_gross_mm_table = malloc(parms.num_iters * sizeof(minmax)); - - for (i = 0; i < parms.num_iters; ++i) { - write_mpi_mm_table[i].min = 0.0; - write_mpi_mm_table[i].max = 0.0; - write_mpi_mm_table[i].sum = 0.0; - write_mpi_mm_table[i].num = 0; - - write_mm_table[i].min = 0.0; - write_mm_table[i].max = 0.0; - write_mm_table[i].sum = 0.0; - write_mm_table[i].num = 0; - - write_gross_mm_table[i].min = 0.0; - write_gross_mm_table[i].max = 0.0; - write_gross_mm_table[i].sum = 0.0; - write_gross_mm_table[i].num = 0; - - read_mpi_mm_table[i].min = 0.0; - read_mpi_mm_table[i].max = 0.0; - read_mpi_mm_table[i].sum = 0.0; - read_mpi_mm_table[i].num = 0; - - read_mm_table[i].min = 0.0; - read_mm_table[i].max = 0.0; - read_mm_table[i].sum = 0.0; - read_mm_table[i].num = 0; - - read_gross_mm_table[i].min = 0.0; - read_gross_mm_table[i].max = 0.0; - read_gross_mm_table[i].sum = 0.0; - read_gross_mm_table[i].num = 0; - } - - /* Do IO iteration times, collecting statics each time */ + /* allocate space for tables minmax and that it is sufficient */ + /* to initialize all elements to zeros by calloc. */ + write_mpi_mm_table = calloc(parms.num_iters , sizeof(minmax)); + write_mm_table = calloc(parms.num_iters , sizeof(minmax)); + write_gross_mm_table = calloc(parms.num_iters , sizeof(minmax)); + write_raw_mm_table = calloc(parms.num_iters , sizeof(minmax)); + read_mpi_mm_table = calloc(parms.num_iters , sizeof(minmax)); + read_mm_table = calloc(parms.num_iters , sizeof(minmax)); + read_gross_mm_table = calloc(parms.num_iters , sizeof(minmax)); + read_raw_mm_table = calloc(parms.num_iters , sizeof(minmax)); + + /* Do IO iteration times, collecting statistics each time */ for (i = 0; i < parms.num_iters; ++i) { double t; @@ -511,6 +518,12 @@ run_test(iotype iot, parameters parms) write_gross_mm_table[i] = write_gross_mm; + /* gather all of the raw "write" times */ + t = get_time(res.timers, HDF5_RAW_WRITE_FIXED_DIMS); + get_minmax(&write_raw_mm, t); + + write_raw_mm_table[i] = write_raw_mm; + /* gather all of the "mpi read" times */ t = get_time(res.timers, HDF5_MPI_READ); get_minmax(&read_mpi_mm, t); @@ -528,12 +541,32 @@ run_test(iotype iot, parameters parms) get_minmax(&read_gross_mm, t); read_gross_mm_table[i] = read_gross_mm; + + /* gather all of the raw "read" times */ + t = get_time(res.timers, HDF5_RAW_READ_FIXED_DIMS); + get_minmax(&read_raw_mm, t); + + read_raw_mm_table[i] = read_raw_mm; pio_time_destroy(res.timers); } /* - * Show various statics + * Show various statistics */ + /* Write statistics */ + /* Print the raw data throughput if desired */ + if(opts->print_raw) { + /* accumulate and output the max, min, and average "raw write" times */ + if (pio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Raw Data Write details:\n"); + output_all_info(write_raw_mm_table, parms.num_iters, 4); + } + + output_results(opts,"Raw Data Write",write_raw_mm_table,parms.num_iters,raw_size); + } /* end if */ + /* show mpi write statics */ if (pio_debug_level >= 3) { /* output all of the times for all iterations */ @@ -542,6 +575,8 @@ run_test(iotype iot, parameters parms) output_all_info(write_mpi_mm_table, parms.num_iters, 4); } + /* We don't currently output the MPI write results */ + /* accumulate and output the max, min, and average "write" times */ if (pio_debug_level >= 3) { /* output all of the times for all iterations */ @@ -550,18 +585,7 @@ run_test(iotype iot, parameters parms) output_all_info(write_mm_table, parms.num_iters, 4); } - total_mm = accumulate_minmax_stuff(write_mm_table, raw_size, parms.num_iters); - - print_indent(3); - output_report("Write (%d iteration(s)):\n", parms.num_iters); - - print_indent(4); - output_report("Minimum Throughput: %.2f MB/s\n", total_mm.min); - print_indent(4); - output_report("Maximum Throughput: %.2f MB/s\n", total_mm.max); - print_indent(4); - output_report("Average Throughput: %.2f MB/s\n", - total_mm.sum / total_mm.num); + output_results(opts,"Write",write_mm_table,parms.num_iters,raw_size); /* accumulate and output the max, min, and average "gross write" times */ if (pio_debug_level >= 3) { @@ -571,18 +595,21 @@ run_test(iotype iot, parameters parms) output_all_info(write_gross_mm_table, parms.num_iters, 4); } - total_mm = accumulate_minmax_stuff(write_gross_mm_table, raw_size, parms.num_iters); - - print_indent(3); - output_report("Write Open-Close (%d iteration(s)):\n", parms.num_iters); + output_results(opts,"Write Open-Close",write_gross_mm_table,parms.num_iters,raw_size); + + /* Read statistics */ + /* Print the raw data throughput if desired */ + if(opts->print_raw) { + /* accumulate and output the max, min, and average "raw read" times */ + if (pio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Raw Data Read details:\n"); + output_all_info(read_raw_mm_table, parms.num_iters, 4); + } - print_indent(4); - output_report("Minimum Throughput: %.2f MB/s\n", total_mm.min); - print_indent(4); - output_report("Maximum Throughput: %.2f MB/s\n", total_mm.max); - print_indent(4); - output_report("Average Throughput: %.2f MB/s\n", - total_mm.sum / total_mm.num); + output_results(opts,"Raw Data Read",read_raw_mm_table,parms.num_iters,raw_size); + } /* end if */ /* show mpi read statics */ if (pio_debug_level >= 3) { @@ -592,6 +619,8 @@ run_test(iotype iot, parameters parms) output_all_info(read_mpi_mm_table, parms.num_iters, 4); } + /* We don't currently output the MPI read results */ + /* accumulate and output the max, min, and average "read" times */ if (pio_debug_level >= 3) { /* output all of the times for all iterations */ @@ -600,18 +629,7 @@ run_test(iotype iot, parameters parms) output_all_info(read_mm_table, parms.num_iters, 4); } - total_mm = accumulate_minmax_stuff(read_mm_table, raw_size, parms.num_iters); - - print_indent(3); - output_report("Read (%d iteration(s)):\n", parms.num_iters); - - print_indent(4); - output_report("Minimum Throughput: %.2f MB/s\n", total_mm.min); - print_indent(4); - output_report("Maximum Throughput: %.2f MB/s\n", total_mm.max); - print_indent(4); - output_report("Average Throughput: %.2f MB/s\n", - total_mm.sum / total_mm.num); + output_results(opts,"Read",read_mm_table,parms.num_iters,raw_size); /* accumulate and output the max, min, and average "gross read" times */ if (pio_debug_level >= 3) { @@ -621,18 +639,7 @@ run_test(iotype iot, parameters parms) output_all_info(read_gross_mm_table, parms.num_iters, 4); } - total_mm = accumulate_minmax_stuff(read_gross_mm_table, raw_size, parms.num_iters); - - print_indent(3); - output_report("Read Open-Close (%d iteration(s)):\n", parms.num_iters); - - print_indent(4); - output_report("Minimum Throughput: %.2f MB/s\n", total_mm.min); - print_indent(4); - output_report("Maximum Throughput: %.2f MB/s\n", total_mm.max); - print_indent(4); - output_report("Average Throughput: %.2f MB/s\n", - total_mm.sum / total_mm.num); + output_results(opts,"Read Open-Close",read_gross_mm_table,parms.num_iters,raw_size); /* clean up our mess */ free(write_mpi_mm_table); @@ -641,6 +648,8 @@ run_test(iotype iot, parameters parms) free(read_mm_table); free(write_gross_mm_table); free(read_gross_mm_table); + free(write_raw_mm_table); + free(read_raw_mm_table); return ret_value; } @@ -667,7 +676,7 @@ output_all_info(minmax *mm, int count, int indent_level) } /* - * Function: get_minmax_stuff + * Function: get_minmax * Purpose: Gather all the min, max and total of val. * Return: Nothing * Programmer: Bill Wendling, 21. December 2001 @@ -694,18 +703,21 @@ get_minmax(minmax *mm, double val) * Return: TOTAL_MM - the total of all of these. * Programmer: Bill Wendling, 21. December 2001 * Modifications: + * Changed to use seconds instead of MB/s - QAK, 5/9/02 */ static minmax -accumulate_minmax_stuff(minmax *mm, off_t raw_size, int count) +accumulate_minmax_stuff(minmax *mm, int count) { register int i; minmax total_mm; - total_mm.sum = total_mm.max = total_mm.min = MB_PER_SEC(raw_size, mm[0].max); + total_mm.sum = 0.0; + total_mm.max = -DBL_MAX; + total_mm.min = DBL_MAX; total_mm.num = count; - for (i = 1; i < count; ++i) { - double m = MB_PER_SEC(raw_size, mm[i].max); + for (i = 0; i < count; ++i) { + double m = mm[i].max; total_mm.sum += m; @@ -801,6 +813,50 @@ destroy_comm_world(void) } /* + * Function: output_results + * Purpose: Print information about the time & bandwidth for a given + * minmax & # of iterations. + * Return: Nothing + * Programmer: Quincey Koziol, 9. May 2002 + * Modifications: + */ +static void +output_results(const struct options *opts, const char *name, minmax *table, + int table_size,off_t data_size) +{ + minmax total_mm; + + total_mm = accumulate_minmax_stuff(table, table_size); + + print_indent(3); + output_report("%s (%d iteration(s)):\n", name,(int)table_size); + + /* Note: The maximum throughput uses the minimum amount of time & vice versa */ + + print_indent(4); + output_report("Maximum Throughput: %6.2f MB/s", MB_PER_SEC(data_size,total_mm.min)); + if(opts->print_times) + output_report(" (%7.3f s)\n", total_mm.min); + else + output_report("\n"); + + print_indent(4); + output_report("Average Throughput: %6.2f MB/s", + MB_PER_SEC(data_size,total_mm.sum / total_mm.num)); + if(opts->print_times) + output_report(" (%7.3f s)\n", (total_mm.sum / total_mm.num)); + else + output_report("\n"); + + print_indent(4); + output_report("Minimum Throughput: %6.2f MB/s", MB_PER_SEC(data_size,total_mm.max)); + if(opts->print_times) + output_report(" (%7.3f s)\n", total_mm.max); + else + output_report("\n"); +} + +/* * Function: output_report * Purpose: Print a line of the report. Only do so if I'm the 0 process. * Return: Nothing @@ -872,55 +928,142 @@ parse_command_line(int argc, char *argv[]) cl_opts->min_num_procs = 1; cl_opts->max_xfer_size = 1 * ONE_MB; cl_opts->min_xfer_size = 128 * ONE_KB; + cl_opts->print_times = 0; /* Printing times is off by default */ + cl_opts->print_raw = 0; /* Printing raw data throughput is off by default */ + cl_opts->h5_alignment = 1; /* No alignment for HDF5 objects by default */ + cl_opts->h5_threshold = 1; /* No threshold for aligning HDF5 objects by default */ + cl_opts->h5_use_chunks = 0; /* Don't chunk the HDF5 dataset by default */ + cl_opts->h5_no_fill = 0; /* Write fill values by default */ while ((opt = get_option(argc, (const char **)argv, s_opts, l_opts)) != EOF) { switch ((char)opt) { + case 'a': + cl_opts->h5_alignment = parse_size_directive(opt_arg); + break; + case 'A': + cl_opts->io_types &= ~0x7; + + { + const char *end = opt_arg; + + while (end && *end != '\0') { + char buf[10]; + int i; + + memset(buf, '\0', sizeof(buf)); + + for (i = 0; *end != '\0' && *end != ','; ++end) + if (isalnum(*end) && i < 10) + buf[i++] = *end; + + if (!strcasecmp(buf, "phdf5")) { + cl_opts->io_types |= PIO_HDF5; + } else if (!strcasecmp(buf, "mpiio")) { + cl_opts->io_types |= PIO_MPI; + } else if (!strcasecmp(buf, "posix")) { + cl_opts->io_types |= PIO_POSIX; + } else { + fprintf(stderr, "pio_perf: invalid --api option %s\n", + buf); + exit(1); + } + + if (*end == '\0') + break; + + end++; + } + } + + break; #if 0 case 'b': /* the future "binary" option */ break; #endif /* 0 */ + case 'c': /* Turn on chunked HDF5 dataset creation */ + cl_opts->h5_use_chunks = 1; + break; case 'd': - cl_opts->num_dsets = strtol(opt_arg, NULL, 10); + cl_opts->num_dsets = atoi(opt_arg); break; case 'D': - pio_debug_level = strtol(opt_arg, NULL, 10); - - if (pio_debug_level > 4) - pio_debug_level = 4; - else if (pio_debug_level < 0) - pio_debug_level = 0; + { + const char *end = opt_arg; + + while (end && *end != '\0') { + char buf[10]; + int i; + + memset(buf, '\0', sizeof(buf)); + + for (i = 0; *end != '\0' && *end != ','; ++end) + if (isalnum(*end) && i < 10) + buf[i++] = *end; + + if (strlen(buf) > 1 || isdigit(buf[0])) { + register int i; + + for (i = 0; i < 10 && buf[i] != '\0'; ++i) + if (!isdigit(buf[i])) { + fprintf(stderr, "pio_perf: invalid --debug option %s\n", + buf); + exit(1); + } + + pio_debug_level = atoi(buf); + + if (pio_debug_level > 4) + pio_debug_level = 4; + else if (pio_debug_level < 0) + pio_debug_level = 0; + } else { + switch (*buf) { + case 'r': + /* Turn on raw data throughput info */ + cl_opts->print_raw = 1; + break; + case 't': + /* Turn on time printing */ + cl_opts->print_times = 1; + break; + default: + fprintf(stderr, "pio_perf: invalid --debug option %s\n", buf); + exit(1); + } + } + + if (*end == '\0') + break; + + end++; + } + } break; case 'f': cl_opts->file_size = parse_size_directive(opt_arg); break; case 'F': - cl_opts->num_files = strtol(opt_arg, NULL, 10); - break; - case 'H': - cl_opts->io_types &= ~0x7; - cl_opts->io_types |= PIO_HDF5; + cl_opts->num_files = atoi(opt_arg); break; case 'i': - cl_opts->num_iters = strtol(opt_arg, NULL, 10); + cl_opts->num_iters = atoi(opt_arg); break; - case 'm': - cl_opts->io_types &= ~0x7; - cl_opts->io_types |= PIO_MPI; + case 'n': /* Turn off writing fill values */ + cl_opts->h5_no_fill = 1; break; case 'o': cl_opts->output_file = opt_arg; break; case 'p': - cl_opts->min_num_procs = strtol(opt_arg, NULL, 10); + cl_opts->min_num_procs = atoi(opt_arg); break; case 'P': - cl_opts->max_num_procs = strtol(opt_arg, NULL, 10); + cl_opts->max_num_procs = atoi(opt_arg); break; - case 'r': - cl_opts->io_types &= ~0x7; - cl_opts->io_types |= PIO_RAW; + case 'T': + cl_opts->h5_threshold = parse_size_directive(opt_arg); break; case 'x': cl_opts->min_xfer_size = parse_size_directive(opt_arg); @@ -1005,41 +1148,58 @@ usage(const char *prog) if (myrank == 0) { fflush(stdout); - fprintf(stdout, "usage: %s [OPTIONS]\n", prog); - fprintf(stdout, " OPTIONS\n"); - fprintf(stdout, " -h, --help Print a usage message and exit\n"); - fprintf(stdout, " -d N, --num-dsets=N Number of datasets per file [default:1]\n"); - fprintf(stdout, " -D N, --debug=N Indicate the debugging level [default:0]\n"); - fprintf(stdout, " -f S, --file-size=S Size of a single file [default: 64M]\n"); - fprintf(stdout, " -F N, --num-files=N Number of files [default: 1]\n"); - fprintf(stdout, " -H, --hdf5 Run HDF5 performance test\n"); - fprintf(stdout, " -i, --num-iterations Number of iterations to perform [default: 1]\n"); - fprintf(stdout, " -m, --mpiio Run MPI/IO performance test\n"); - fprintf(stdout, " -o F, --output=F Output raw data into file F [default: none]\n"); - fprintf(stdout, " -P N, --max-num-processes=N Maximum number of processes to use [default: all MPI_COMM_WORLD processes ]\n"); - fprintf(stdout, " -p N, --min-num-processes=N Minimum number of processes to use [default: 1]\n"); - fprintf(stdout, " -r, --raw Run raw (UNIX) performance test\n"); - fprintf(stdout, " -X S, --max-xfer-size=S Maximum transfer buffer size [default: 1M]\n"); - fprintf(stdout, " -x S, --min-xfer-size=S Minimum transfer buffer size [default: 128K]\n"); - fprintf(stdout, "\n"); - fprintf(stdout, " F - is a filename.\n"); - fprintf(stdout, " N - is an integer >=0.\n"); - fprintf(stdout, " S - is a size specifier, an integer >=0 followed by a size indicator:\n"); - fprintf(stdout, "\n"); - fprintf(stdout, " K - Kilobyte\n"); - fprintf(stdout, " M - Megabyte\n"); - fprintf(stdout, " G - Gigabyte\n"); - fprintf(stdout, "\n"); - fprintf(stdout, " Example: 37M = 37 Megabytes\n"); - fprintf(stdout, "\n"); - fprintf(stdout, " Debugging levels are:\n"); - fprintf(stdout, "\n"); - fprintf(stdout, " 0 - None\n"); - fprintf(stdout, " 1 - Minimal\n"); - fprintf(stdout, " 2 - Not quite everything\n"); - fprintf(stdout, " 3 - Everything\n"); - fprintf(stdout, " 4 - Everything and the kitchen sink\n"); - fprintf(stdout, "\n"); + printf("usage: %s [OPTIONS]\n", prog); + printf(" OPTIONS\n"); + printf(" -h, --help Print a usage message and exit\n"); + printf(" -a S, --align=S Alignment of objects in HDF5 file [default: 1]\n"); + printf(" -A AL, --api=AL Which APIs to test [default: all of them]\n"); +#if 0 + printf(" -b, --binary The elusive binary option\n"); +#endif /* 0 */ + printf(" -c, --chunk Create HDF5 datasets chunked [default: off]\n"); + printf(" -d N, --num-dsets=N Number of datasets per file [default:1]\n"); + printf(" -D DL, --debug=DL Indicate the debugging level\n"); + printf(" [default: no debugging]\n"); + printf(" -f S, --file-size=S Size of a single file [default: 64M]\n"); + printf(" -F N, --num-files=N Number of files [default: 1]\n"); + printf(" -i, --num-iterations Number of iterations to perform [default: 1]\n"); + printf(" -n, --no-fill Don't write fill values to HDF5 dataset\n"); + printf(" [default: off (i.e. write fill values)]\n"); + printf(" -o F, --output=F Output raw data into file F [default: none]\n"); + printf(" -P N, --max-num-processes=N Maximum number of processes to use\n"); + printf(" [default: all MPI_COMM_WORLD processes ]\n"); + printf(" -p N, --min-num-processes=N Minimum number of processes to use [default: 1]\n"); + printf(" -T S, --threshold=S Threshold for alignment of objects in HDF5 file\n"); + printf(" [default: 1]\n"); + printf(" -X S, --max-xfer-size=S Maximum transfer buffer size [default: 1M]\n"); + printf(" -x S, --min-xfer-size=S Minimum transfer buffer size [default: 128K]\n"); + printf("\n"); + printf(" F - is a filename.\n"); + printf(" N - is an integer >=0.\n"); + printf(" S - is a size specifier, an integer >=0 followed by a size indicator:\n"); + printf(" K - Kilobyte\n"); + printf(" M - Megabyte\n"); + printf(" G - Gigabyte\n"); + printf("\n"); + printf(" Example: 37M = 37 Megabytes\n"); + printf("\n"); + printf(" AL - is an API list. Valid values are:\n"); + printf(" phdf5 - Parallel HDF5\n"); + printf(" mpiio - MPI-I/O\n"); + printf(" posix - POSIX\n"); + printf("\n"); + printf(" Example: --api=mpiio,phdf5\n"); + printf("\n"); + printf(" DL - is a list of debugging flags. Valid values are:\n"); + printf(" 1 - Minimal\n"); + printf(" 2 - Not quite everything\n"); + printf(" 3 - Everything\n"); + printf(" 4 - Everything and the kitchen sink\n"); + printf(" r - Raw data I/O throughput information\n"); + printf(" t - Times as well as throughputs\n"); + printf("\n"); + printf(" Example: --debug=2,r,t\n"); + printf("\n"); fflush(stdout); } } diff --git a/perform/pio_perf.h b/perform/pio_perf.h index bea92f4..89aa879 100644 --- a/perform/pio_perf.h +++ b/perform/pio_perf.h @@ -11,7 +11,7 @@ #include "H5private.h" typedef enum iotype_ { - RAWIO, + POSIXIO, MPIO, PHDF5 /*NUM_TYPES*/ @@ -22,9 +22,13 @@ typedef struct parameters_ { int num_procs; /* Maximum number of processes to use */ int num_files; /* Number of files to create */ long num_dsets; /* Number of datasets to create */ - long num_elmts; /* Number of native ints in each dset */ + off_t num_elmts; /* Number of native ints in each dset */ int num_iters; /* Number of times to loop doing the IO */ size_t buf_size; /* Buffer size */ + hsize_t h5_align; /* HDF5 object alignment */ + hsize_t h5_thresh; /* HDF5 object alignment threshold */ + unsigned h5_use_chunks; /* Make HDF5 dataset chunked */ + unsigned h5_no_fill; /* Disable HDF5 writing fill values */ } parameters; typedef struct results_ { diff --git a/perform/pio_timer.c b/perform/pio_timer.c index d053e10..588ea1d 100644 --- a/perform/pio_timer.c +++ b/perform/pio_timer.c @@ -41,15 +41,12 @@ pio_time *timer_g; /* timer: global for stub functions */ * Modifications: */ pio_time * -pio_time_new(unsigned int type) +pio_time_new(clock_type type) { pio_time *pt = (pio_time *)calloc(1, sizeof(struct pio_time_)); - register int i; /* set global timer variable */ timer_g = pt; - for (i = 0; i < NUM_TIMERS; ++i) - pt->total_time[i] = 0.0; pt->type = type; return pt; @@ -82,7 +79,7 @@ pio_time_destroy(pio_time *pt) * Modifications: */ void -set_timer_type(pio_time *pt, timer_type type) +set_timer_type(pio_time *pt, clock_type type) { pt->type = type; } @@ -94,7 +91,7 @@ set_timer_type(pio_time *pt, timer_type type) * Programmer: Bill Wendling, 04. October 2001 * Modifications: */ -timer_type +clock_type get_timer_type(pio_time *pt) { return pt->type; @@ -131,47 +128,53 @@ set_time(pio_time *pt, timer_type t, int start_stop) ((double)pt->sys_timer[t].tv_usec) / MICROSECOND); } } - } - if (pio_debug_level >= 4) { - char *msg; - int myrank; - - MPI_Comm_rank(pio_comm_g, &myrank); - - switch (t) { - case HDF5_FILE_OPENCLOSE: - msg = "File Open/Close"; - break; - case HDF5_DATASET_CREATE: - msg = "Dataset Create"; - break; - case HDF5_MPI_WRITE: - msg = "MPI Write"; - break; - case HDF5_MPI_READ: - msg = "MPI Read"; - break; - case HDF5_FINE_WRITE_FIXED_DIMS: - msg = "Fine Write"; - break; - case HDF5_FINE_READ_FIXED_DIMS: - msg = "Fine Read"; - break; - case HDF5_GROSS_WRITE_FIXED_DIMS: - msg = "Gross Write"; - break; - case HDF5_GROSS_READ_FIXED_DIMS: - msg = "Gross Read"; - break; - default: - msg = "Unknown Timer"; - break; - } + if (pio_debug_level >= 4) { + char *msg; + int myrank; + + MPI_Comm_rank(pio_comm_g, &myrank); + + switch (t) { + case HDF5_FILE_OPENCLOSE: + msg = "File Open/Close"; + break; + case HDF5_DATASET_CREATE: + msg = "Dataset Create"; + break; + case HDF5_MPI_WRITE: + msg = "MPI Write"; + break; + case HDF5_MPI_READ: + msg = "MPI Read"; + break; + case HDF5_FINE_WRITE_FIXED_DIMS: + msg = "Fine Write"; + break; + case HDF5_FINE_READ_FIXED_DIMS: + msg = "Fine Read"; + break; + case HDF5_GROSS_WRITE_FIXED_DIMS: + msg = "Gross Write"; + break; + case HDF5_GROSS_READ_FIXED_DIMS: + msg = "Gross Read"; + break; + case HDF5_RAW_WRITE_FIXED_DIMS: + msg = "Raw Write"; + break; + case HDF5_RAW_READ_FIXED_DIMS: + msg = "Raw Read"; + break; + default: + msg = "Unknown Timer"; + break; + } - fprintf(output, " Proc %d: %s %s: %.2f\n", myrank, msg, - (start_stop == START ? "Start" : "Stop"), - pt->total_time[t]); + fprintf(output, " Proc %d: %s %s: %.2f\n", myrank, msg, + (start_stop == START ? "Start" : "Stop"), + pt->total_time[t]); + } } return pt; diff --git a/perform/pio_timer.h b/perform/pio_timer.h index 4144825..81916dd 100644 --- a/perform/pio_timer.h +++ b/perform/pio_timer.h @@ -28,21 +28,25 @@ typedef enum timer_type_ { HDF5_FINE_READ_FIXED_DIMS, HDF5_GROSS_WRITE_FIXED_DIMS, HDF5_GROSS_READ_FIXED_DIMS, + HDF5_RAW_WRITE_FIXED_DIMS, + HDF5_RAW_READ_FIXED_DIMS, NUM_TIMERS } timer_type; -/* Miscellaneous identifiers */ -enum { +typedef enum clock_type_ { MPI_TIMER = 0, /* Use MPI timer to measure time */ - SYS_TIMER = 1, /* Use system clock to measure time */ + SYS_TIMER = 1 /* Use system clock to measure time */ +} clock_type; +/* Miscellaneous identifiers */ +enum { START, /* Start a specified timer */ STOP /* Stop a specified timer */ }; /* The performance time structure */ typedef struct pio_time_ { - unsigned int type : 1; + clock_type type; double total_time[NUM_TIMERS]; double mpi_timer[NUM_TIMERS]; struct timeval sys_timer[NUM_TIMERS]; @@ -52,10 +56,10 @@ typedef struct pio_time_ { #ifdef __cplusplus extern "C" { #endif /* __cplusplus */ -extern pio_time *pio_time_new(unsigned int); +extern pio_time *pio_time_new(clock_type t); extern void pio_time_destroy(pio_time *pt); -extern void set_timer_type(pio_time *pt, timer_type type); -extern timer_type get_timer_type(pio_time *pt); +extern void set_timer_type(pio_time *pt, clock_type type); +extern clock_type get_timer_type(pio_time *pt); extern pio_time *set_time(pio_time *pt, timer_type t, int start_stop); extern double get_time(pio_time *pt, timer_type t); #ifdef __cplusplus |