From bb76b558e6d5c3e1c35a5f08da480435db039e19 Mon Sep 17 00:00:00 2001 From: Albert Cheng Date: Wed, 7 Nov 2001 10:28:33 -0500 Subject: [svn-r4592] Purpose: New addition Description: Initial version of the Parallel I/O performance measurement program. Not fully implemented yet but checking them in before I may destroy them by accident. Don't run this in small file system (like AFS or eirene) since it generates gigabytes test files. Platforms tested: modi4 64bits. It compiled and ran but took a long time because the current test parametes are too "wild". --- perform/Dependencies | 74 ++++- perform/Makefile.in | 21 +- perform/pio_engine.c | 832 +++++++++++++++++++++++++++++++++++++++++++++++++++ perform/pio_perf.c | 324 ++++++++++++++++++++ perform/pio_perf.h | 26 ++ perform/pio_timer.c | 137 +++++++++ perform/pio_timer.h | 70 +++++ 7 files changed, 1473 insertions(+), 11 deletions(-) create mode 100644 perform/pio_engine.c create mode 100644 perform/pio_perf.c create mode 100644 perform/pio_perf.h create mode 100644 perform/pio_timer.c create mode 100644 perform/pio_timer.h diff --git a/perform/Dependencies b/perform/Dependencies index 173aa04..bfbe986 100644 --- a/perform/Dependencies +++ b/perform/Dependencies @@ -38,6 +38,76 @@ iopipe.lo: \ $(top_srcdir)/src/H5FDlog.h \ $(top_srcdir)/src/H5private.h \ $(top_builddir)/src/H5config.h +mpi-perf.lo: \ + $(srcdir)/mpi-perf.c \ + $(top_srcdir)/src/hdf5.h \ + $(top_srcdir)/src/H5public.h \ + $(top_builddir)/src/H5pubconf.h \ + $(top_srcdir)/src/H5api_adpt.h \ + $(top_srcdir)/src/H5Ipublic.h \ + $(top_srcdir)/src/H5Apublic.h \ + $(top_srcdir)/src/H5ACpublic.h \ + $(top_srcdir)/src/H5Bpublic.h \ + $(top_srcdir)/src/H5Dpublic.h \ + $(top_srcdir)/src/H5Epublic.h \ + $(top_srcdir)/src/H5Fpublic.h \ + $(top_srcdir)/src/H5FDpublic.h \ + $(top_srcdir)/src/H5Gpublic.h \ + $(top_srcdir)/src/H5HGpublic.h \ + $(top_srcdir)/src/H5HLpublic.h \ + $(top_srcdir)/src/H5MMpublic.h \ + $(top_srcdir)/src/H5Opublic.h \ + $(top_srcdir)/src/H5Ppublic.h \ + $(top_srcdir)/src/H5Zpublic.h \ + $(top_srcdir)/src/H5Rpublic.h \ + $(top_srcdir)/src/H5Spublic.h \ + $(top_srcdir)/src/H5Tpublic.h \ + $(top_srcdir)/src/H5FDcore.h \ + $(top_srcdir)/src/H5FDfamily.h \ + $(top_srcdir)/src/H5FDmpio.h \ + $(top_srcdir)/src/H5FDsec2.h \ + $(top_srcdir)/src/H5FDstdio.h \ + $(top_srcdir)/src/H5FDsrb.h \ + $(top_srcdir)/src/H5FDgass.h \ + $(top_srcdir)/src/H5FDdpss.h \ + $(top_srcdir)/src/H5FDstream.h \ + $(top_srcdir)/src/H5FDmulti.h \ + $(top_srcdir)/src/H5FDlog.h +perf.lo: \ + $(srcdir)/perf.c \ + $(top_srcdir)/src/hdf5.h \ + $(top_srcdir)/src/H5public.h \ + $(top_builddir)/src/H5pubconf.h \ + $(top_srcdir)/src/H5api_adpt.h \ + $(top_srcdir)/src/H5Ipublic.h \ + $(top_srcdir)/src/H5Apublic.h \ + $(top_srcdir)/src/H5ACpublic.h \ + $(top_srcdir)/src/H5Bpublic.h \ + $(top_srcdir)/src/H5Dpublic.h \ + $(top_srcdir)/src/H5Epublic.h \ + $(top_srcdir)/src/H5Fpublic.h \ + $(top_srcdir)/src/H5FDpublic.h \ + $(top_srcdir)/src/H5Gpublic.h \ + $(top_srcdir)/src/H5HGpublic.h \ + $(top_srcdir)/src/H5HLpublic.h \ + $(top_srcdir)/src/H5MMpublic.h \ + $(top_srcdir)/src/H5Opublic.h \ + $(top_srcdir)/src/H5Ppublic.h \ + $(top_srcdir)/src/H5Zpublic.h \ + $(top_srcdir)/src/H5Rpublic.h \ + $(top_srcdir)/src/H5Spublic.h \ + $(top_srcdir)/src/H5Tpublic.h \ + $(top_srcdir)/src/H5FDcore.h \ + $(top_srcdir)/src/H5FDfamily.h \ + $(top_srcdir)/src/H5FDmpio.h \ + $(top_srcdir)/src/H5FDsec2.h \ + $(top_srcdir)/src/H5FDstdio.h \ + $(top_srcdir)/src/H5FDsrb.h \ + $(top_srcdir)/src/H5FDgass.h \ + $(top_srcdir)/src/H5FDdpss.h \ + $(top_srcdir)/src/H5FDstream.h \ + $(top_srcdir)/src/H5FDmulti.h \ + $(top_srcdir)/src/H5FDlog.h chunk.lo: \ $(srcdir)/chunk.c \ $(top_srcdir)/src/hdf5.h \ @@ -108,7 +178,3 @@ overhead.lo: \ $(top_srcdir)/src/H5FDstream.h \ $(top_srcdir)/src/H5FDmulti.h \ $(top_srcdir)/src/H5FDlog.h -mpi-perf.lo: \ - $(srcdir)/mpi-perf.c -perf.lo: \ - $(srcdir)/perf.c diff --git a/perform/Makefile.in b/perform/Makefile.in index 31c6149..d1b967e 100644 --- a/perform/Makefile.in +++ b/perform/Makefile.in @@ -1,7 +1,8 @@ ## HDF5 Library Performance Makefile(.in) ## -## Copyright (C) 2001 National Center for Supercomputing Applications. -## All rights reserved. +## Copyright (C) 2001 +## National Center for Supercomputing Applications. +## All rights reserved. ## top_srcdir=@top_srcdir@ top_builddir=.. @@ -10,13 +11,14 @@ srcdir=@srcdir@ ## Add include directory to the C preprocessor flags and the hdf5 library ## to the library list. -CPPFLAGS=-I. -I$(srcdir) -I../src -I$(top_srcdir)/src @CPPFLAGS@ +CPPFLAGS=-I. -I$(srcdir) -I../src -I$(top_srcdir)/src -I$(top_srcdir)/tools/lib @CPPFLAGS@ LIBHDF5=../src/libhdf5.la LIBH5TEST=../test/libh5test.la +LIBH5TOOLS=../tools/lib/libh5tools.la ## These are the programs that `make all' or `make tests' will build and which ## `make check' will run. List them in the order they should be run. -TEST_PROGS_PARA=mpi-perf perf +TEST_PROGS_PARA=mpi-perf perf pio_perf TEST_PROGS=iopipe chunk overhead ## These are the files that `make clean' (and derivatives) will remove from @@ -26,14 +28,19 @@ CLEAN=*.h5 *.raw *.dat x-gnuplot ## List all source files here. The list of object files will be ## created by replacing the `.c' with a `.o'. This list is necessary ## for building automatic dependencies. -TEST_SRC_PARA=mpi-perf.c perf.c -TEST_SRC=iopipe.c chunk.c overhead.c \ - $(TEST_SRC_PARA) +PIO_PERF_SRC=pio_perf.c pio_engine.c pio_timer.c +TEST_SRC_PARA=mpi-perf.c perf.c $(PIO_PERF_SRC) +TEST_SRC=iopipe.c chunk.c overhead.c $(TEST_SRC_PARA) +PIO_PERF_OBJ=$(PIO_PERF_SRC:.c=.lo) TEST_OBJ=$(TEST_SRC:.c=.lo) ## How to build the programs... they all depend on the hdf5 library $(TEST_PROGS) $(TEST_PROGS_PARA): $(LIBHDF5) +pio_perf: $(PIO_PERF_OBJ) + @$(LT_LINK_EXE) $(CFLAGS) -o $@ $(PIO_PERF_OBJ) $(LIBH5TEST) \ + $(LIBHDF5) $(LDFLAGS) $(LIBH5TOOLS) $(LIBS) + perf: perf.lo @$(LT_LINK_EXE) $(CFLAGS) -o $@ perf.lo $(LIBH5TEST) $(LIBHDF5) \ $(LDFLAGS) $(LIBS) diff --git a/perform/pio_engine.c b/perform/pio_engine.c new file mode 100644 index 0000000..f54114c --- /dev/null +++ b/perform/pio_engine.c @@ -0,0 +1,832 @@ +/* + * Author: Albert Cheng of NCSA, Oct 24, 2001. + */ + +#include "hdf5.h" +#ifdef H5_HAVE_PARALLEL +#include +#include +#include +#include "pio_perf.h" +#include "pio_timer.h" +#ifdef OLDSTUFF +#include +#include +#include +#include +#endif /* OLDSTUFF */ +#include +#ifndef MPI_FILE_NULL /*MPIO may be defined in mpi.h already */ +# include +#endif + + +/* Macro definitions */ +#define GOTOERROR(errcode) {ret_code=errcode; goto done;} +#define GOTODONE {goto done;} +#define ERRMSG(mesg) { \ + fprintf(stderr, "Proc %d: ", myrank); \ + fprintf(stderr, "*** Assertion failed (%s) at line %4d in %s\n", \ + mesg, (int)__LINE__, __FILE__); \ +} +#define MSG(mesg) { \ + fprintf(stderr, "Proc %d: ", myrank); \ + fprintf(stderr, "(%s) at line %4d in %s\n", \ + mesg, (int)__LINE__, __FILE__); \ +} +/* Verify: + * if val is false (0), print mesg. + */ +#define VRFY(val, mesg) do { \ + if (!val) { \ + ERRMSG(mesg); \ + GOTOERROR(1); \ + } \ +} while(0) + +#ifndef HDmalloc +#define HDmalloc(x) malloc(x) +#endif + +#ifndef HDfree +#define HDfree(x) free(x) +#endif + +#ifndef HDopen +#ifdef O_BINARY +#define HDopen(S,F,M) open(S,F|_O_BINARY,M) +#else +#define HDopen(S,F,M) open(S,F,M) +#endif +#endif + +#ifndef HDclose +#define HDclose(F) close(F) +#endif + +#ifndef HDseek +#define HDseek(F,L,W) lseek(F,L,W) +#endif + +#ifndef HDwrite +#define HDwrite(F,B,S) write(F,B,S) +#endif + +#ifndef HDread +#define HDread(F,B,S) read(F,B,S) +#endif + +/* Raw I/O macros */ +#define RAWCREATE(fn) HDopen(fn, O_CREAT|O_TRUNC|O_RDWR, 0600) +#define RAWOPEN(fn, F) HDopen(fn, F, 0600) +#define RAWCLOSE(F) HDclose(F) +#define RAWSEEK(F,L) HDseek(F,L,SEEK_SET) +#define RAWWRITE(F,B,S) HDwrite(F,B,S) +#define RAWREAD(F,B,S) HDread(F,B,S) + + + +#ifdef OLDSTUFF +hid_t dataset; /* Dataset ID */ +char *meta_ext, *raw_ext; /* holds the meta and raw file extension if */ + /* opt_split_vfd is set */ + + +/* DEFAULT VALUES FOR OPTIONS */ +int64_t opt_block = 1048576*16; +int opt_iter = 1; +int opt_stripe = -1; +int opt_correct = 0; +int amode = O_RDWR | O_CREAT; +char opt_file[256] = "/tmp/test.out\0"; +char opt_pvfstab[256] = "notset\0"; +int opt_pvfstab_set = 0; + +/* function prototypes */ +double Wtime(void); + +extern int errno; +extern int debug_on; + +/* globals needed for getopt */ +extern char *optarg; +extern int optind, opterr; +#endif /* old stuff */ + +int +dopio(parameters param) +{ + MPI_Comm comm = MPI_COMM_NULL; + int myrank; + int nprocs=1; + int ret_code=0; /* return code */ + iotype iot; + char fname[256]; + unsigned int maxprocs; + unsigned int nfiles, nf; + unsigned long ndsets, nd; + unsigned long nelmts; + unsigned int niters; + unsigned long nelmts_towrite, nelmts_written; + unsigned long nelmts_toread, nelmts_read; + size_t elmt_size; + off_t dset_offset; /*dataset offset in a file */ + off_t next_offset; /*offset of next I/O */ + int rc; /*routine return code */ + int color; /*for communicator creation */ + int mrc; /*mpi return code */ + char *buffer=NULL; /*data buffer pointer */ + size_t buffer_size=1024*1024; /*data buffer size, 1MB */ + size_t nelmts_in_buffer; /*Number of elmts the buffer*/ + /*can hold.*/ + size_t dset_size; /*one dataset size in Byte*/ + int rawfd=-1; /*Raw IO file handle */ + MPI_File mpifd=MPI_FILE_NULL; /*MPI IO file handle */ + /* hdf5 variables */ + hid_t acc_tpl=-1; /* File access templates */ + hid_t h5fd=-1; /*HDF5 IO file handle */ + herr_t hrc; /*HDF5 return code */ + hsize_t h5dims[1]; /* dataset dim sizes */ + hsize_t h5block[1], h5stride[1], h5count[1]; + hssize_t h5start[1]; + hid_t h5dset_space_id = -1; /*Dataset space ID */ + hid_t h5mem_space_id = -1; /*memory dataspace ID */ + char dname[64]; /*dataset name */ + hid_t h5ds_id = -1; /*Dataset handle*/ + +#ifdef OLDSTUFF + char *buf, *tmp, *buf2, *tmp2, *check; + int i, j, myrank=0, nprocs=1, err, my_correct = 1, correct, myerrno; + double stim, etim; + double write_tim = 0; + double read_tim = 0; + double read_bw, write_bw; + double max_read_tim, max_write_tim; + double min_read_tim, min_write_tim; + double ave_read_tim, ave_write_tim; + int64_t iter_jump = 0; + int64_t seek_position = 0; + MPI_File fh; + MPI_Status status; + int nchars; +#endif /* OLDSTUFF */ + + + /* + *Setup parameters and sanity check + */ + /* IO type */ + iot = param.io_type; + switch (iot) { + case RAW: + /* nothing */ + break; + case MPIO: + break; + case PHDF5: + break; + default: + /* unknown request */ + fprintf(stderr, "Unknown IO type request (%d)\n", iot); + GOTOERROR(1); + } + + /* number of files */ + nfiles = param.num_files; + + /* number of datasets per file */ + ndsets = param.num_dsets; + + /* number of elements per dataset */ + nelmts = param.num_elmts; + if (nelmts == 0 ){ + fprintf(stderr, + "number of elements per dataset must be positive (%lu)\n", + nelmts); + GOTOERROR(1); + } + + /* number of iterations of reads or writes */ + niters = param.num_iters; + + /* maximun number of mpi-processes to use */ + maxprocs = param.max_num_procs; + if (maxprocs == 0 ){ + fprintf(stderr, + "maximun number of process to use must be positive (%u)\n", + maxprocs); + GOTOERROR(1); + } + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + if (maxprocs > nprocs){ + fprintf(stderr, + "maximun number of process(%d) must be <= process in MPI_COMM_WORLD(%d)\n", + maxprocs, nprocs); + GOTOERROR(1); + } + + +/* DEBUG*/ +fprintf(stderr, "nfiles=%u\n", nfiles); +fprintf(stderr, "ndsets=%lu\n", ndsets); +fprintf(stderr, "nelmts=%lu\n", nelmts); +fprintf(stderr, "niters=%u\n", niters); +fprintf(stderr, "maxprocs=%u\n", maxprocs); +nfiles=3; +/*ndsets=5; */ + + /* + *Create a sub communicator for this run + *Easier to use the first N processes. + */ + MPI_Comm_rank(comm, &myrank); + color = (myrank < maxprocs); + mrc = MPI_Comm_split (MPI_COMM_WORLD, color, myrank, &comm); + if (mrc!=MPI_SUCCESS) { + fprintf(stderr, "MPI_Comm_split failed\n"); + GOTOERROR(1); + } + + if (!color){ /* not involved in this run */ + mrc = MPI_Comm_free(&comm); + GOTODONE; + } + + /* determine the mpi rank of in the new comm */ + MPI_Comm_size(comm, &nprocs); + MPI_Comm_rank(comm, &myrank); + + /* Calculate dataset parameters */ + /* Data type is always native C int */ + elmt_size = sizeof(int); + dset_size = nelmts * elmt_size; + + /* allocate data buffer */ + buffer = HDmalloc(buffer_size); + if (buffer == NULL){ + fprintf(stderr, "malloc for data buffer failed\n"); + GOTOERROR(1); + } + nelmts_in_buffer = buffer_size/elmt_size; + + /* hdf5 dataset setup */ + if (iot == PHDF5){ + /* define a contiquous dataset of nelmts native ints */ + h5dims[0] = nelmts; + h5dset_space_id = H5Screate_simple (1, h5dims, NULL); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + + /* create the memory dataspace */ + h5dims[0] = nelmts_in_buffer; + h5mem_space_id = H5Screate_simple (1, h5dims, NULL); + VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + } + + + + + + for (nf=1; nf <= nfiles; nf++){ + /* + *Open file for write + */ +MSG("creating file"); + sprintf(fname, "#pio_tmp_%u", nf); + switch (iot) { + case RAW: + strcat(fname, ".raw"); + rawfd = RAWCREATE(fname); + if (rawfd < 0 ){ + fprintf(stderr, "Raw File Create failed(%s)\n", fname); + GOTOERROR(1); + } + break; + case MPIO: + strcat(fname, ".mpio"); + mrc = MPI_File_open(comm, fname, MPI_MODE_CREATE|MPI_MODE_RDWR, + MPI_INFO_NULL, &mpifd); + if (mrc != MPI_SUCCESS){ + fprintf(stderr, "MPI File Create failed(%s)\n", fname); + GOTOERROR(1); + } + break; + case PHDF5: + strcat(fname, ".h5"); + acc_tpl = H5Pcreate (H5P_FILE_ACCESS); + VRFY((acc_tpl >= 0), ""); + hrc = H5Pset_fapl_mpio(acc_tpl, comm, MPI_INFO_NULL); + VRFY((hrc >= 0), ""); + /*do not close acc_tpl. It will used to open file for read later*/ + /* create the parallel file */ + h5fd=H5Fcreate(fname,H5F_ACC_TRUNC,H5P_DEFAULT,acc_tpl); + if (h5fd < 0){ + fprintf(stderr, "HDF5 file Create failed(%s)\n", fname); + GOTOERROR(1); + } + break; + } + + for (nd=1; nd <= ndsets; nd++){ + /* Calculate dataset offset within a file */ + + /* create dataset */ + switch (iot){ + case RAW: + case MPIO: + /* both raw and mpi io just need dataset offset in file*/ + dset_offset = (nd-1)*dset_size; + break; + case PHDF5: + sprintf(dname, "Dataset_%lu", nd); + h5ds_id = H5Dcreate(h5fd, dname, H5T_NATIVE_INT, h5dset_space_id, + H5P_DEFAULT); + VRFY((h5ds_id >= 0), "H5Dcreate"); + break; + } + + + nelmts_written = 0 ; + while (nelmts_written < nelmts){ + nelmts_towrite = nelmts - nelmts_written; + if (nelmts - nelmts_written >= nelmts_in_buffer){ + nelmts_towrite = nelmts_in_buffer; + }else{ + /* last write of a partial buffer */ + nelmts_towrite = nelmts - nelmts_written; + } + + /*Prepare write data*/ + { + int *intptr = (int*)buffer; + int i; + for (i=0; i=0), "HDF5 Dataset Close failed\n"); + h5ds_id = -1; + } + } + + /* Close file for write */ +MSG("closing write file"); + switch (iot) { + case RAW: + rc = RAWCLOSE(rawfd); + VRFY((rc==0), "HDclose"); + rawfd = -1; + break; + case MPIO: + mrc = MPI_File_close(&mpifd); + if (mrc != MPI_SUCCESS){ + fprintf(stderr, "MPI File close failed\n"); + GOTOERROR(1); + } + break; + case PHDF5: + hrc=H5Fclose(h5fd); + if (hrc < 0){ + fprintf(stderr, "HDF5 File Close failed(%s)\n", fname); + GOTOERROR(1); + }else{ + h5fd=-1; + } + break; + } + + + + + /* Open file for read */ +MSG("opening file to read"); + switch (iot) { + case RAW: + rawfd = RAWOPEN(fname, O_RDONLY); + if (rawfd < 0 ){ + fprintf(stderr, "Raw File Open failed(%s)\n", fname); + GOTOERROR(1); + } + break; + case MPIO: + mrc = MPI_File_open(comm, fname, MPI_MODE_RDONLY, + MPI_INFO_NULL, &mpifd); + if (mrc != MPI_SUCCESS){ + fprintf(stderr, "MPI File Open failed(%s)\n", fname); + GOTOERROR(1); + } + break; + case PHDF5: + /* open the parallel file */ + h5fd=H5Fopen(fname,H5P_DEFAULT,acc_tpl); + if (h5fd < 0){ + fprintf(stderr, "HDF5 file Open failed(%s)\n", fname); + GOTOERROR(1); + } + /* can release acc_tpl now */ + hrc = H5Pclose(acc_tpl); + VRFY((hrc >= 0), "H5Pclose"); + acc_tpl = -1; + break; + } + + /* Calculate dataset offset within a file */ + + /* Open dataset for read */ + + /* Prepare read */ + + /* Calculate offset of read within a dataset/file */ + + /* Read */ + + /* Calculate read time */ + + /* Close dataset for read */ + + /* Close file for read */ +MSG("closing read file"); + switch (iot) { + case RAW: + rc = RAWCLOSE(rawfd); + VRFY((rc==0), "HDclose"); + rawfd = -1; + break; + case MPIO: + mrc = MPI_File_close(&mpifd); + if (mrc != MPI_SUCCESS){ + fprintf(stderr, "MPI File close failed\n"); + GOTOERROR(1); + } + break; + case PHDF5: + hrc=H5Fclose(h5fd); + if (h5fd < 0){ + fprintf(stderr, "HDF5 file Create failed(%s)\n", fname); + GOTOERROR(1); + }else{ + h5fd=-1; + } + break; + } + + } + +done: + /* clean up */ + /* release HDF5 objects */ + if (acc_tpl != -1){ + hrc = H5Pclose(acc_tpl); + if (hrc < 0){ + fprintf(stderr, "HDF5 Property List Close failed\n"); + ret_code=1; + } + else + acc_tpl = -1; + } + if (h5dset_space_id != -1){ + hrc = H5Sclose(h5dset_space_id); + if (hrc < 0){ + fprintf(stderr, "HDF5 Dataset Space Close failed\n"); + ret_code=1; + } + else + h5dset_space_id = -1; + } + if (h5mem_space_id != -1){ + hrc = H5Sclose(h5mem_space_id); + if (hrc < 0){ + fprintf(stderr, "HDF5 Memory Space Close failed\n"); + ret_code=1; + } + else + h5mem_space_id = -1; + } + if (h5ds_id != -1){ + hrc = H5Dclose(h5ds_id); + if (hrc < 0){ + fprintf(stderr, "HDF5 Dataset Close failed\n"); + ret_code=1; + } + else + h5ds_id = -1; + } + + /* close any opened files */ + if (rawfd != -1){ + rc = HDclose(rawfd); + if (rc != 0){ + ERRMSG("Raw file close failed"); + ret_code=1; + } + else + rawfd = -1; + } + if (mpifd != MPI_FILE_NULL){ + MPI_File_close(&mpifd); + } + if (h5fd != -1){ + H5Fclose(h5fd); + h5fd=-1; + } + /* release MPI resources */ + if (comm != MPI_COMM_NULL){ + MPI_Comm_free(&comm); + } + /* release generic resources */ + if (buffer != NULL){ + HDfree(buffer); + buffer = NULL; + } +fprintf(stderr, "returning with ret_code=%d\n", ret_code); + return(ret_code); +} + +#ifdef OLDSTUFF +int +original_main() +{ + + /* parse the command line arguments */ + parse_args(argc, argv); + + if (mynod == 0) printf("# Using hdf5-io calls.\n"); + + + /* kindof a weird hack- if the location of the pvfstab file was + * specified on the command line, then spit out this location into + * the appropriate environment variable: */ + +#if H5_HAVE_SETENV +/* no setenv or unsetenv */ + if (opt_pvfstab_set) { + if((setenv("PVFSTAB_FILE", opt_pvfstab, 1)) < 0){ + perror("setenv"); + goto die_jar_jar_die; + } + } +#endif + + /* this is how much of the file data is covered on each iteration of + * the test. used to help determine the seek offset on each + * iteration */ + iter_jump = nprocs * opt_block; + + /* setup a buffer of data to write */ + if (!(tmp = (char *) malloc(opt_block + 256))) { + perror("malloc"); + goto die_jar_jar_die; + } + buf = tmp + 128 - (((long)tmp) % 128); /* align buffer */ + + if (opt_correct) { + /* do the same buffer setup for verifiable data */ + if (!(tmp2 = (char *) malloc(opt_block + 256))) { + perror("malloc2"); + goto die_jar_jar_die; + } + buf2 = tmp + 128 - (((long)tmp) % 128); + } + + /* setup file access template with parallel IO access. */ + if (opt_split_vfd){ + hid_t mpio_pl; + + mpio_pl = H5Pcreate (H5P_FILE_ACCESS); + VRFY((acc_tpl >= 0), "", H5FATAL); + ret = H5Pset_fapl_mpio(mpio_pl, MPI_COMM_WORLD, MPI_INFO_NULL); + VRFY((ret >= 0), "", H5FATAL); + + /* set optional allocation alignment */ + if (opt_alignment*opt_threshold != 1){ + ret = H5Pset_alignment(acc_tpl, opt_threshold, opt_alignment ); + VRFY((ret >= 0), "H5Pset_alignment succeeded", !H5FATAL); + } + + /* setup file access template */ + acc_tpl = H5Pcreate (H5P_FILE_ACCESS); + VRFY((acc_tpl >= 0), "", H5FATAL); + ret = H5Pset_fapl_split(acc_tpl, meta_ext, mpio_pl, raw_ext, mpio_pl); + VRFY((ret >= 0), "H5Pset_fapl_split succeeded", H5FATAL); + }else{ + + /* set optional allocation alignment */ + if (opt_alignment*opt_threshold != 1){ + ret = H5Pset_alignment(acc_tpl, opt_threshold, opt_alignment ); + VRFY((ret >= 0), "H5Pset_alignment succeeded", !H5FATAL); + } + } + + + + /* now each process writes a block of opt_block chars in round robbin + * fashion until the whole dataset is covered. + */ + for (j=0; j < opt_iter; j++) { + /* setup a file dataspace selection */ + start[0] = (j*iter_jump)+(mynod*opt_block); + stride[0] = block[0] = opt_block; + count[0]= 1; + ret=H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((ret >= 0), "H5Sset_hyperslab succeeded", H5FATAL); + + if (opt_correct) /* fill in buffer for iteration */ { + for (i=mynod+j, check=buf; i= 0), "H5Dwrite dataset1 succeeded", !H5FATAL); + + /* discover the ending time of the operation */ + etim = MPI_Wtime(); + + write_tim += (etim - stim); + + /* we are done with this "write" iteration */ + } + + /* close dataset and file */ + ret=H5Dclose(dataset); + VRFY((ret >= 0), "H5Dclose succeeded", H5FATAL); + ret=H5Fclose(fid); + VRFY((ret >= 0), "H5Fclose succeeded", H5FATAL); + + + + /* wait for everyone to synchronize at this point */ + MPI_Barrier(MPI_COMM_WORLD); + + /* reopen the file for reading */ + fid=H5Fopen(opt_file,H5F_ACC_RDONLY,acc_tpl); + VRFY((fid >= 0), "", H5FATAL); + + /* open the dataset */ + dataset = H5Dopen(fid, "Dataset1"); + VRFY((dataset >= 0), "H5Dopen succeeded", H5FATAL); + + /* we can re-use the same mem_dataspace and file_dataspace + * the H5Dwrite used since the dimension size is the same. + */ + + /* we are going to repeat the read the same pattern the write used */ + for (j=0; j < opt_iter; j++) { + /* setup a file dataspace selection */ + start[0] = (j*iter_jump)+(mynod*opt_block); + stride[0] = block[0] = opt_block; + count[0]= 1; + ret=H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((ret >= 0), "H5Sset_hyperslab succeeded", H5FATAL); + /* seek to the appropriate spot give the current iteration and + * rank within the MPI processes */ + + /* discover the start time */ + MPI_Barrier(MPI_COMM_WORLD); + stim = MPI_Wtime(); + + /* read data */ + /* read in the file data */ + if (!opt_correct){ + ret = H5Dread(dataset, H5T_NATIVE_CHAR, mem_dataspace, file_dataspace, + H5P_DEFAULT, buf); + } + else{ + ret = H5Dread(dataset, H5T_NATIVE_CHAR, mem_dataspace, file_dataspace, + H5P_DEFAULT, buf2); + } + myerrno = errno; + /* discover the end time */ + etim = MPI_Wtime(); + read_tim += (etim - stim); + VRFY((ret >= 0), "H5Dwrite dataset1 succeeded", !H5FATAL); + + + if (ret < 0) fprintf(stderr, "node %d, read error, loc = %Ld: %s\n", + mynod, mynod*opt_block, strerror(myerrno)); + + /* if the user wanted to check correctness, compare the write + * buffer to the read buffer */ + if (opt_correct && memcmp(buf, buf2, opt_block)) { + fprintf(stderr, "node %d, correctness test failed\n", mynod); + my_correct = 0; + MPI_Allreduce(&my_correct, &correct, 1, MPI_INT, MPI_MIN, + MPI_COMM_WORLD); + } + + /* we are done with this read iteration */ + } + + /* close dataset and file */ + ret=H5Dclose(dataset); + VRFY((ret >= 0), "H5Dclose succeeded", H5FATAL); + ret=H5Fclose(fid); + VRFY((ret >= 0), "H5Fclose succeeded", H5FATAL); + + /* compute the read and write times */ + MPI_Allreduce(&read_tim, &max_read_tim, 1, MPI_DOUBLE, MPI_MAX, + MPI_COMM_WORLD); + MPI_Allreduce(&read_tim, &min_read_tim, 1, MPI_DOUBLE, MPI_MIN, + MPI_COMM_WORLD); + MPI_Allreduce(&read_tim, &ave_read_tim, 1, MPI_DOUBLE, MPI_SUM, + MPI_COMM_WORLD); + + /* calculate the average from the sum */ + ave_read_tim = ave_read_tim / nprocs; + + MPI_Allreduce(&write_tim, &max_write_tim, 1, MPI_DOUBLE, MPI_MAX, + MPI_COMM_WORLD); + MPI_Allreduce(&write_tim, &min_write_tim, 1, MPI_DOUBLE, MPI_MIN, + MPI_COMM_WORLD); + MPI_Allreduce(&write_tim, &ave_write_tim, 1, MPI_DOUBLE, MPI_SUM, + MPI_COMM_WORLD); + + /* calculate the average from the sum */ + ave_write_tim = ave_write_tim / nprocs; + + /* print out the results on one node */ + if (mynod == 0) { + read_bw = ((int64_t)(opt_block*nprocs*opt_iter))/(max_read_tim*1000000.0); + write_bw = ((int64_t)(opt_block*nprocs*opt_iter))/(max_write_tim*1000000.0); + + printf("nr_procs = %d, nr_iter = %d, blk_sz = %ld\n", nprocs, + opt_iter, (long)opt_block); + + printf("# total_size = %ld\n", (long)(opt_block*nprocs*opt_iter)); + + printf("# Write: min_time = %f, max_time = %f, mean_time = %f\n", + min_write_tim, max_write_tim, ave_write_tim); + printf("# Read: min_time = %f, max_time = %f, mean_time = %f\n", + min_read_tim, max_read_tim, ave_read_tim); + + printf("Write bandwidth = %f Mbytes/sec\n", write_bw); + printf("Read bandwidth = %f Mbytes/sec\n", read_bw); + + if (opt_correct) { + printf("Correctness test %s.\n", correct ? "passed" : "failed"); + } + } + + +die_jar_jar_die: + +#if H5_HAVE_SETENV +/* no setenv or unsetenv */ + /* clear the environment variable if it was set earlier */ + if (opt_pvfstab_set){ + unsetenv("PVFSTAB_FILE"); + } +#endif + + free(tmp); + if (opt_correct) free(tmp2); + MPI_Finalize(); + return(0); +} + +/* Wtime() - returns current time in sec., in a double */ +double Wtime() +{ + struct timeval t; + + gettimeofday(&t, NULL); + return((double)t.tv_sec + (double)t.tv_usec / 1000000); +} + +#endif /* OLDSTUFF */ + +#else /* H5_HAVE_PARALLEL */ +/* dummy program since H5_HAVE_PARALLE is not configured in */ +int +main() +{ +printf("No parallel IO performance because parallel is not configured in\n"); +return(0); +} +#endif /* H5_HAVE_PARALLEL */ diff --git a/perform/pio_perf.c b/perform/pio_perf.c new file mode 100644 index 0000000..8971428 --- /dev/null +++ b/perform/pio_perf.c @@ -0,0 +1,324 @@ +/* + * Copyright (C) 2001 + * National Center for Supercomputing Applications + * All rights reserved. + * + */ + +/* + * Parallel HDF5 Performance Testing Code + * -------------------------------------- + * + * Portable code to test performance on the different platforms we support. + * This is what the report should look like: + * + * nprocs = Max#Procs + * IO Type = Raw + * # Files = 1, # of dsets = 1000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * # Files = 1, # of dsets = 3000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * + * . . . + * + * IO Type = MPIO + * # Files = 1, # of dsets = 1000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * # Files = 1, # of dsets = 3000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * + * . . . + * + * IO Type = PHDF5 + * # Files = 1, # of dsets = 1000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * # Files = 1, # of dsets = 3000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * + * . . . + * + * nprocs = Max#Procs / 2 + * + * . . . + * + */ + +/* system header files */ +#include +#include + +/* library header files */ +#include + +/* our header files */ +#include "h5tools_utils.h" +#include "pio_perf.h" + +/* useful macros */ +#define TAB_SPACE 4 + +#define ONE_GB 1073741824UL + +#if 0 +#define MIN_HDF5_BUF_SIZE 1024 +#define MAX_HDF5_BUF_SIZE (ONE_GB / 2) +#else +#define MIN_HDF5_BUF_SIZE 1024*1024*8 +#define MAX_HDF5_BUF_SIZE MIN_HDF5_BUF_SIZE*4 +#endif + +/* local variables */ +static const char *progname = "pio_perf"; + +/* + * Command-line options: The user can specify short or long-named + * parameters. The long-named ones can be partially spelled. When + * adding more, make sure that they don't clash with each other. + */ +#if 1 +static const char *s_opts = "ho:"; +#else +static const char *s_opts = "hbo:"; +#endif /* 1 */ +static struct long_options l_opts[] = { + { "help", no_arg, 'h' }, + { "hel", no_arg, 'h' }, + { "he", no_arg, 'h' }, +#if 0 + /* a siting of the elusive binary option */ + { "binary", no_arg, 'b' }, + { "binar", no_arg, 'b' }, + { "bina", no_arg, 'b' }, + { "bin", no_arg, 'b' }, + { "bi", no_arg, 'b' }, +#endif /* 0 */ + { "max-size", require_arg, 'm' }, + { "max-siz", require_arg, 'm' }, + { "max-si", require_arg, 'm' }, + { "max-s", require_arg, 'm' }, + { "max", require_arg, 'm' }, + { "ma", require_arg, 'm' }, + { "output", require_arg, 'o' }, + { "outpu", require_arg, 'o' }, + { "outp", require_arg, 'o' }, + { "out", require_arg, 'o' }, + { "ou", require_arg, 'o' }, + { NULL, 0, '\0' } +}; + +struct options { + const char *output_file; /* file to print report to */ + long max_size; /* maximum size of file in gigabytes */ +}; + +/* local functions */ +static struct options *parse_command_line(int argc, char *argv[]); +static void run_test_loop(FILE *output, int max_num_procs, long max_size); +static void print_indent(register FILE *output, register int indent); +static void usage(const char *prog); + +/* + * Function: main + * Purpose: Start things up. Initialize MPI and then call the test looping + * function. + * Return: EXIT_SUCCESS or EXIT_FAILURE + * Programmer: Bill Wendling, 30. October 2001 + * Modifications: + */ +int +main(int argc, char **argv) +{ + int world_size, ret; + int exit_value = EXIT_SUCCESS; + FILE *output = stdout; + struct options *opts; + + opts = parse_command_line(argc, argv); + + if (opts->output_file) { + if ((output = fopen(opts->output_file, "w")) == NULL) { + fprintf(stderr, "%s: cannot open output file\n", progname); + perror(opts->output_file); + goto onions; + } + } + + /* initialize MPI and get the maximum num of processors we started with */ + MPI_Init(&argc, &argv); + ret = MPI_Comm_size(MPI_COMM_WORLD, &world_size); + + if (ret != MPI_SUCCESS) { + fprintf(stderr, "%s: MPI_Comm_size call failed\n", progname); + + if (ret == MPI_ERR_COMM) + fprintf(stderr, "invalid MPI communicator\n"); + else + fprintf(stderr, "invalid argument\n"); + + exit_value = EXIT_FAILURE; + goto cheese_and; + } + + run_test_loop(output, world_size, opts->max_size); + +cheese_and: + MPI_Finalize(); + +onions: + free(opts); + return exit_value; +} + +/* + * Function: run_test_loop + * Purpose: Run the I/O tests. Write the results to OUTPUT. + * + * - The slowest changing part of the test is the number of + * processors to use. For each loop iteration, we divide that + * number by 2 and rerun the test. + * + * - The second slowest is what type of IO to perform. We have + * three choices: RAW, MPI-IO, and PHDF5. + * + * - Then we change the size of the buffer. This information is + * inferred from the number of datasets to create and the number + * of integers to put into each dataset. The backend code figures + * this out. + * + * Return: Nothing + * Programmer: Bill Wendling, 30. October 2001 + * Modifications: + */ +static void +run_test_loop(FILE *output, int max_num_procs, long max_size) +{ + parameters parms; + + /* num_files stays ``1'' for now but may change later */ + parms.num_files = 1; + parms.num_iters = 1; + + /* divide the maximum number of processors by 2 for each loop iter */ + for (; max_num_procs > 0; max_num_procs /= 2) { + register iotype i; + + parms.max_num_procs = max_num_procs; + fprintf(output, "Number of processors = %u\n", parms.max_num_procs); + + for (i = RAW; i <= PHDF5; ++i) { + register unsigned long j; + + parms.io_type = i; + print_indent(output, TAB_SPACE * 1); + fprintf(output, "Type of IO = "); + + if (i == RAW) + fprintf(output, "Raw\n"); + else if (i == MPIO) + fprintf(output, "MPIO\n"); + else + fprintf(output, "PHDF5\n"); + + for (j = MIN_HDF5_BUF_SIZE; j <= MAX_HDF5_BUF_SIZE; j <<= 1) { + parms.num_dsets = ONE_GB / j; + parms.num_elmts = (max_size * j) / sizeof(int); + + print_indent(output, TAB_SPACE * 2); + fprintf(output, + "# of files: %u, # of dsets: %lu, Elements per dset: %lu\n", + parms.num_files, parms.num_dsets, parms.num_elmts); + + /* call Albert's testing here */ + dopio(parms); + /* get back ``result'' object and report */ + } + } + } +} + +/* + * Function: print_indent + * Purpose: Print spaces to indent a new line of text for pretty printing + * things. + * Return: Nothing + * Programmer: Bill Wendling, 29. October 2001 + * Modifications: + */ +static void +print_indent(register FILE *output, register int indent) +{ + for (; indent > 0; --indent) + fputc(' ', output); + + fputc('\n', output); +} + +/* + * Function: usage + * Purpose: Print a usage message and then exit. + * Return: Nothing + * Programmer: Bill Wendling, 31. October 2001 + * Modifications: + */ +static void +usage(const char *prog) +{ + fflush(stdout); + fprintf(stdout, "usage: %s [OPTIONS]\n", prog); + fprintf(stdout, " OPTIONS\n"); + fprintf(stdout, " -h, --help Print a usage message and exit\n"); + fprintf(stdout, " -m #, --max-size=# Maximum size of file in gigabytes [default: 2]\n"); + fprintf(stdout, " -o F, --output=F Output raw data into file F\n"); + fprintf(stdout, "\n"); + fprintf(stdout, " F - is a filename.\n"); + fprintf(stdout, "\n"); +} + +/* + * Function: parse_command_line + * Purpose: Parse the command line options and return a STRUCT OPTIONS + * structure which will need to be freed by the calling function. + * Return: Nothing + * Programmer: Bill Wendling, 31. October 2001 + * Modifications: + */ +static struct options * +parse_command_line(int argc, char *argv[]) +{ + int opt; + struct options *cl_opts; + + cl_opts = (struct options *)calloc(1, sizeof(struct options)); + cl_opts->max_size = 2; + + while ((opt = get_option(argc, (const char **)argv, s_opts, l_opts)) != EOF) { + switch ((char)opt) { +#if 0 + case 'b': + /* the future "binary" option */ + break; +#endif /* 0 */ + case 'm': + cl_opts->max_size = atol(opt_arg); + break; + case 'o': + cl_opts->output_file = opt_arg; + break; + case 'h': + usage(progname); + exit(EXIT_SUCCESS); + case '?': + default: + usage(progname); + exit(EXIT_FAILURE); + } + } + + return cl_opts; +} diff --git a/perform/pio_perf.h b/perform/pio_perf.h new file mode 100644 index 0000000..8f21df2 --- /dev/null +++ b/perform/pio_perf.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2001 + * National Center for Supercomputing Applications + * All rights reserved. + * + */ +#ifndef PIO_PERF_H__ +#define PIO_PERF_H__ + +typedef enum iotype_ { + RAW, + MPIO, + PHDF5 + /*NUM_TYPES*/ +} iotype; + +typedef struct parameters_ { + unsigned int max_num_procs; /* Maximum number of processes to use */ + iotype io_type; /* The type of IO test to perform */ + unsigned int num_files; /* Number of files to create */ + unsigned long num_dsets; /* Number of datasets to create */ + unsigned long num_elmts; /* Number of native ints in each dset */ + unsigned int num_iters; /* Number of times to loop doing the IO */ +} parameters; + +#endif /* PIO_PERF_H__ */ diff --git a/perform/pio_timer.c b/perform/pio_timer.c new file mode 100644 index 0000000..1220e38 --- /dev/null +++ b/perform/pio_timer.c @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2001 + * National Center for Supercomputing Applications + * All rights reserved. + */ + +/* + * Purpose: + * + * This is a module of useful timing functions for performance testing. + */ + +#include +#include +#include + +#if 0 + +#include "hdf5.h" + +#endif /* 0 */ + +#include "pio_timer.h" + +/* + * The number to divide the tv_usec field with to get a nice decimal to add to + * the number of seconds. + */ +#define MILLISECOND 1000000.0 + +/* + * Function: perf_time_new + * Purpose: Build us a brand, spankin', new performance time object. + * The object is a black box to the user. They just tell us + * what type of timer they want (MPI_TIMER for MPI_Wtime or + * SYS_TIMER for system time). + * Return: Pointer to perf_time object + * Programmer: Bill Wendling, 01. October 2001 + * Modifications: + */ +perf_time * +perf_time_new(unsigned int type) +{ + perf_time *pt = (perf_time *)calloc(1, sizeof(struct perf_time_)); + + pt->type = type; + return pt; +} + +/* + * Function: perf_time_destroy + * Purpose: Remove the memory allocated for the perf_time object. Only + * need to call on a pointer allocated with the ``perf_time_new'' + * function. + * Return: Nothing + * Programmer: Bill Wendling, 01. October 2001 + * Modifications: + */ +void +perf_time_destroy(perf_time *pt) +{ + free(pt); +} + +/* + * Function: set_timer_type + * Purpose: Set the type of the timer to either MPI_TIMER or SYS_TIMER. + * This really only needs to be called if you didn't construct a + * timer with the perf_timer_new function (shame!). + * Return: Nothing + * Programmer: Bill Wendling, 04. October 2001 + * Modifications: + */ +void +set_timer_type(perf_time *pt, timer_type type) +{ + pt->type = type; +} + +/* + * Function: get_timer_type + * Purpose: Get the type of the timer. + * Return: MPI_TIMER or SYS_TIMER. + * Programmer: Bill Wendling, 04. October 2001 + * Modifications: + */ +timer_type +get_timer_type(perf_time *pt) +{ + return pt->type; +} + +/* + * Function: set_time + * Purpose: Set the time in a ``perf_time'' object. + * Return: Pointer to the passed in ``perf_time'' object. + * Programmer: Bill Wendling, 01. October 2001 + * Modifications: + */ +perf_time * +set_time(perf_time *pt, timer_type t, int start_stop) +{ + if (pt) { + if (pt->type == MPI_TIMER) { + if (start_stop == START) { + pt->mpi_timer[t] = MPI_Wtime(); + } else { + pt->total_time[t] += MPI_Wtime() - pt->mpi_timer[t]; + } + } else { + if (start_stop == START) { + gettimeofday(&pt->sys_timer[t], NULL); + } else { + struct timeval sys_t; + + gettimeofday(&sys_t, NULL); + pt->total_time[t] += (double)pt->sys_timer[t].tv_sec + + ((double)pt->sys_timer[t].tv_usec) / MILLISECOND; + } + } + } + + return pt; +} + +/* + * Function: get_time + * Purpose: Get the time from a ``perf_time'' object. + * Return: The number of seconds as a DOUBLE. + * Programmer: Bill Wendling, 01. October 2001 + * Modifications: + */ +double +get_time(perf_time *pt, timer_type t) +{ + return pt->total_time[t]; +} diff --git a/perform/pio_timer.h b/perform/pio_timer.h new file mode 100644 index 0000000..637b2bb --- /dev/null +++ b/perform/pio_timer.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2001 + * National Center for Supercomputing Applications + * All rights reserved. + * + */ +#ifndef PERF_TIMER__ +#define PERF_TIMER__ + +#if 0 + +#if defined(H5_TIME_WITH_SYS_TIME) +# include +# include +#elif defined(H5_HAVE_SYS_TIME_H) +# include +#else +# include +#endif + +#else + +#include +#include + +#endif /* 0 */ + +/* The different types of timers we can have */ +typedef enum timer_type_ { + HDF5_MPI_OVERHEAD, + HDF5_FILE_OPENCLOSE, + HDF5_GROUP_CREATE, + HDF5_DATASET_CREATE, + HDF5_WRITE_FIXED_DIMS, + HDF5_READ_FIXED_DIMS, + NUM_TIMERS +} timer_type; + +/* Miscellaneous identifiers */ +enum { + MPI_TIMER = 0, /* Use MPI timer to measure time */ + SYS_TIMER = 1, /* Use system clock to measure time */ + + START, /* Start a specified timer */ + STOP /* Stop a specified timer */ +}; + +/* The performance time structure */ +typedef struct perf_time_ { + unsigned int type : 1; + double total_time[NUM_TIMERS]; + double mpi_timer[NUM_TIMERS]; + struct timeval sys_timer[NUM_TIMERS]; +} perf_time; + +/* External function declarations */ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ +extern perf_time *perf_time_new(unsigned int); +extern void perf_time_destroy(perf_time *pt); +extern void set_timer_type(perf_time *pt, timer_type type); +extern timer_type get_timer_type(perf_time *pt); +extern perf_time *set_time(perf_time *pt, timer_type t, int start_stop); +extern double get_time(perf_time *pt, timer_type t); +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* PERF_TIMER__ */ -- cgit v0.12