#include <unistd.h> #include <stdlib.h> #include <assert.h> #include <math.h> #include "hdf5.h" /* Local macros */ /* defines for type of VFL driver to use */ #define FACC_DEFAULT 0 #define FACC_MPIO 1 #define FACC_MPIPOSIX 2 /* Defines for computing performance information */ #define ONE_KB 1024 #define ONE_MB (ONE_KB * ONE_KB) #define ONE_GB (ONE_MB * ONE_KB) /* report 0.0 in case t is zero too */ #define MB_PER_SEC(bytes,t) ((fabs(t)<0.0000000001) ? 0.0 : ((((double)bytes) / ONE_MB) / (t))) /* Control default behavior (with no command line arguments) */ #define DEFAULT_RANK 3 #define DEFAULT_DIM 1024 #define DEFAULT_PREFIX "/tmp" #define DEFAULT_USERNAME "koziol" #define DEFAULT_FILENAME "benchpar.h5" #define DEFAULT_SLICE 0 #define DEFAULT_C_TYPE int #define DEFAULT_HDF5_DATATYPE H5T_NATIVE_INT /* Keep this in sync with the DEFAULT_C_TYPE */ #define DEFAULT_DATASET_NAME "Dataset" #define DEFAULT_VFL_DRIVER FACC_MPIO #define DEFAULT_PAR_MODE H5FD_MPIO_COLLECTIVE #define DEFAULT_CHUNK_STORAGE 0 #define DEFAULT_ITER 3 /* MPI info */ int mpi_rank, mpi_size; int mpi_namelen; char mpi_name[MPI_MAX_PROCESSOR_NAME]; /* Usage information */ static void usage(void) { printf("usage: benchpar [-d <# of dims>] [-s <dim_size>] [-f <file name>] [-h]\n"); printf(" [-S <slice dim>] [-p] [-I] [-c] [-i <# of iterations>\n"); printf(" -c - Use chunked storage for dataset with 1-1 exact\n"); printf(" mapping of chunks to hyperslabs\n"); printf(" Default: off (i.e. contiguous storage)\n"); printf(" -d <# of dims> - Number of dimensions of the dataset\n"); printf(" Default: 3\n"); printf(" -f <file name> - Set the name of the test file\n"); printf(" Default: /tmp/<login>/benchpar.h5\n"); printf(" -h - Prints usage information\n"); printf(" -i <# of iters> - Set the number of test iterations to perform\n"); printf(" Default: 3\n"); printf(" -I - Use independent parallel I/O\n"); printf(" Default: use collective parallel I/O\n"); printf(" -p - Use MPI-posix VFL driver\n"); printf(" Default: use MPI-I/O VFL driver\n"); printf(" -s <dim_size> - Set the size of each of the dataset's dimensions\n"); printf(" Default: 1024\n"); printf(" -S <slice dim> - Set the dimension to slice the dataset along\n"); printf(" Default: 0\n"); } /* end usage() */ /* Create & initialize file creation property list with appropriate properties */ static hid_t create_fcpl(void) { hid_t fcpl; /* File creation property list */ fcpl=H5Pcreate(H5P_FILE_CREATE); assert(fcpl>0); return(fcpl); } /* end create_fcpl() */ /* Create & initialize file access property list with appropriate properties */ static hid_t create_fapl(MPI_Comm comm, MPI_Info info, int acc_type ) { hid_t fapl; /* File access property list */ hbool_t use_gpfs = FALSE; /* Use GPFS hints */ herr_t ret; /* Generic return value */ fapl = H5Pcreate (H5P_FILE_ACCESS); assert(fapl>0); /* set parallel access with communicator, using MPI-I/O driver */ if (acc_type == FACC_MPIO) { ret = H5Pset_fapl_mpio(fapl, comm, info); assert(ret>=0); } /* end if */ /* set parallel access with communicator, using MPI-posix driver */ if (acc_type == FACC_MPIPOSIX) { ret = H5Pset_fapl_mpiposix(fapl, comm, use_gpfs); assert(ret>=0); } /* end if */ return (fapl); } /* end create_fapl() */ /* Create & initialize dataset creation property list with appropriate properties */ static hid_t create_dcpl(unsigned use_chunks, int rank, hsize_t *dims) { hid_t dcpl; /* Dataset creation property list */ herr_t ret; /* Generic return value */ dcpl=H5Pcreate(H5P_DATASET_CREATE); assert(dcpl>0); /* Check if the dataset should be chunked */ if(use_chunks) { ret = H5Pset_chunk(dcpl, rank, dims); assert(ret>=0); } /* end if */ return(dcpl); } /* end create_dcpl() */ /* Create & initialize dataset transfer property list with appropriate properties */ static hid_t create_dxpl(H5FD_mpio_xfer_t par_mode) { hid_t dxpl; /* Dataset creation property list */ herr_t ret; /* Generic return value */ dxpl=H5Pcreate(H5P_DATASET_XFER); assert(dxpl>0); /* Set collective I/O on this transfer */ ret=H5Pset_dxpl_mpio(dxpl, par_mode); assert(ret>=0); return(dxpl); } /* end create_dcpl() */ int main(int argc, char *argv[]) { int curr_arg; /* Current command line argument being processed */ int rank; /* Number of dimensions of the dataset */ hsize_t dim_size; /* Dimension size of each dimension */ hsize_t *dims; /* Pointer to array of dimensions */ hssize_t *start; /* Pointer to array of starting locations for hyperslab selection */ hsize_t *count; /* Pointer to array of counts for hyperslab selection */ unsigned slice_dim; /* Dimension to slice up */ char *file_name=NULL; /* Name of file to put data into */ hid_t fcpl; /* HDF5 File creation property list ID */ hid_t fapl; /* HDF5 File access property list ID */ hid_t dcpl; /* HDF5 Dataset creation property list ID */ hid_t dxpl; /* HDF5 Dataset transfer property list ID */ hid_t fid; /* HDF5 file ID */ hid_t dsid; /* HDF5 dataset ID */ hid_t file_sid; /* HDF5 dataspace ID for dataset on disk */ hid_t mem_sid; /* HDF5 dataspace ID for dataset in memory */ DEFAULT_C_TYPE *buf; /* Buffer to write out */ hsize_t buf_size; /* Size of buffer to write */ int i; /* Local index variable */ herr_t ret; /* Generic return value */ double start_write_time, end_write_time, elap_write_time; /* Start, end and elapsed time to write raw data */ double tmp_max_write_time; /* Temporary holders for maximum time for all nodes to perform raw data I/O */ double max_write_time=-DBL_MAX, min_write_time=DBL_MAX; /* Minimum & maximum time for all nodes to perform raw data I/O */ double start_file_time, end_file_time, elap_file_time; /* Start, end and elapsed time from file open to file close */ double tmp_max_file_time; /* Temporary holders for maximum time for all nodes from file open to file close */ double max_file_time=-DBL_MAX, min_file_time=DBL_MAX; /* Minimum & maximum time for all nodes from file open to file close */ int vfl_type; /* Type of VFL driver to use */ H5FD_mpio_xfer_t par_mode; /* Type of parallel I/O to perform */ unsigned use_chunks; /* Whether to use chunks for dataset or not */ unsigned num_iter; /* Number of iterations to perform */ unsigned u; /* Local index variable */ /* Un-buffer the stdout and stderr */ setbuf(stderr, NULL); setbuf(stdout, NULL); /* MPI initialization */ MPI_Init(&argc,&argv); MPI_Comm_size(MPI_COMM_WORLD,&mpi_size); MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank); MPI_Get_processor_name(mpi_name,&mpi_namelen); /* Set some defaults */ rank=DEFAULT_RANK; dim_size=DEFAULT_DIM; slice_dim=DEFAULT_SLICE; vfl_type=DEFAULT_VFL_DRIVER; par_mode=DEFAULT_PAR_MODE; use_chunks=DEFAULT_CHUNK_STORAGE; num_iter=DEFAULT_ITER; /* Parse command line arguments */ if(argc>1) { curr_arg=1; while(curr_arg<argc) { /* Trap any unknown command line parameters */ if(argv[curr_arg][0]!='-') { printf("unknown command line parameter: %s\n",argv[curr_arg]); goto done; } /* end if */ /* Skip over command line flag */ curr_arg++; switch(argv[curr_arg-1][1]) { case 'c': /* Use chunked storage for dataset */ use_chunks=1; break; case 'd': /* Change number of dimensions */ /* Get new number of dimensions */ rank=atoi(argv[curr_arg]); curr_arg++; /* Skip over number of dimensions */ /* Sanity check */ if(rank<=0) { printf("rank=%d, invalid number of dimensions: %d\n",mpi_rank,rank); goto done; } /* end if */ break; case 'f': /* Change test file name */ /* Get new file name */ file_name=strdup(argv[curr_arg]); curr_arg++; /* Skip over file name from command line */ break; case 'h': /* Print usage information */ usage(); goto done; break; case 'i': /* Change number of iterations */ /* Get new number of dimensions */ num_iter=atoi(argv[curr_arg]); curr_arg++; /* Skip over number of iterations */ /* Sanity check */ if(num_iter<1) { printf("rank=%d, invalid number of iterations: %u\n",mpi_rank,num_iter); goto done; } /* end if */ break; case 'I': /* Use independent I/O for parallel I/O */ par_mode=H5FD_MPIO_INDEPENDENT; break; case 'p': /* Use MPI-posix VFL driver */ vfl_type=FACC_MPIPOSIX; break; case 's': /* Change dimension size */ /* Get new dimension size */ dim_size=atoi(argv[curr_arg]); curr_arg++; /* Skip over dimension size from command line */ /* Sanity check */ if(dim_size<=0) { printf("rank=%d, invalid dimension size: %ld\n",mpi_rank,(long)dim_size); goto done; } /* end if */ break; case 'S': /* Change dimension to slice */ /* Get new dimension to slice */ slice_dim=atoi(argv[curr_arg]); curr_arg++; /* Skip over slice dimension from command line */ break; default: printf("rank=%d, unknown command line parameter: %s\n",mpi_rank,argv[curr_arg-1]); goto done; } /* end switch */ } /* end while */ } /* end if */ /* Sanity check */ if(slice_dim>=rank) { printf("rank=%d, error, slice dim larger than rank: slice_dim=%d, rank=%d\n",mpi_rank,slice_dim,rank); goto done; } /* end if */ /* Set rest of defaults */ if(file_name==NULL) { char *login; /* Pointer to login name */ /* Get the login name for this user */ login=getlogin(); if(login==NULL) login=DEFAULT_USERNAME; /* Allocate enough room for the prefix, the login name, two '/'s, the filename and the string terminator */ file_name=malloc(strlen(DEFAULT_PREFIX)+1+strlen(login)+1+strlen(DEFAULT_FILENAME)+1); strcpy(file_name,DEFAULT_PREFIX); strcat(file_name,"/"); strcat(file_name,login); strcat(file_name,"/"); strcat(file_name,DEFAULT_FILENAME); } /* end if */ /* Allocate memory for this process's portion of dataset */ buf_size=sizeof(DEFAULT_C_TYPE); for(i=0; i<rank; i++) buf_size *= dim_size; buf_size /= mpi_size; /* Sanity check for overflow */ assert((hsize_t)((size_t)buf_size)==buf_size); buf=malloc((size_t)buf_size); assert(buf); /* Initialize dataset portion to something unique for each process */ memset(buf,mpi_rank,(size_t)buf_size); for(u=0; u<num_iter; u++) { /* Create file creation property list */ fcpl=create_fcpl(); assert(fcpl>0); /* Create file access property list */ fapl=create_fapl(MPI_COMM_WORLD,MPI_INFO_NULL,vfl_type); assert(fapl>0); /* Get file start time */ start_file_time = MPI_Wtime(); /* Create file */ fid=H5Fcreate(file_name,H5F_ACC_TRUNC,fcpl,fapl); assert(fid>0); /* Close file creation property list */ ret=H5Pclose(fcpl); assert(ret>=0); /* Close file access property list */ ret=H5Pclose(fapl); assert(ret>=0); /* Create dataspace for dataset on disk */ dims=malloc(sizeof(hsize_t)*rank); assert(dims); for(i=0; i<rank; i++) dims[i]=dim_size; file_sid=H5Screate_simple(rank,dims,NULL); assert(file_sid>0); /* Create dataspace for buffer in memory */ for(i=0; i<rank; i++) dims[i]=dim_size; dims[slice_dim] /= mpi_size; mem_sid=H5Screate_simple(rank,dims,NULL); assert(mem_sid>0); /* Create dataset creation property list */ dcpl=create_dcpl(use_chunks,rank,dims); assert(dcpl>0); /* Create dataset */ dsid=H5Dcreate(fid,DEFAULT_DATASET_NAME,DEFAULT_HDF5_DATATYPE,file_sid,dcpl); assert(dsid>0); /* Close dataset creation property list */ ret=H5Pclose(dcpl); assert(ret>=0); /* Select hyperslab for file dataspace */ start=malloc(sizeof(hssize_t)*rank); assert(start); count=malloc(sizeof(hsize_t)*rank); assert(count); for(i=0; i<rank; i++) { start[i]=0; count[i]=dim_size; } /* end for */ start[slice_dim]=mpi_rank*(dim_size/mpi_size); count[slice_dim]=dim_size/mpi_size; ret = H5Sselect_hyperslab(file_sid,H5S_SELECT_SET,start,NULL,count,NULL); assert(ret>=0); /* Create dataset transfer property list */ dxpl=create_dxpl(par_mode); assert(dxpl>0); /* Get raw data start time */ start_write_time = MPI_Wtime(); /* Write hyperslab to dataset */ ret = H5Dwrite(dsid, DEFAULT_HDF5_DATATYPE, mem_sid, file_sid, dxpl, buf); assert(ret>=0); /* Get stop time for raw data timer */ end_write_time = MPI_Wtime(); /* Close dataset transfer property list */ ret=H5Pclose(dxpl); assert(ret>=0); /* Close memory dataspace */ ret=H5Sclose(mem_sid); assert(ret>=0); /* Close file dataspace */ ret=H5Sclose(file_sid); assert(ret>=0); /* Close dataset */ ret=H5Dclose(dsid); assert(ret>=0); /* Close file */ ret=H5Fclose(fid); assert(ret>=0); /* Get stop time for file timer */ end_file_time = MPI_Wtime(); /* Compute timing results */ elap_write_time=end_write_time-start_write_time; elap_file_time=end_file_time-start_file_time; /* Collect the minimum and maximum times by MPI reduces */ MPI_Allreduce(&elap_write_time, &tmp_max_write_time, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&elap_file_time, &tmp_max_file_time, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); /* Track the fastest & slowest total runs */ if(tmp_max_write_time>max_write_time) max_write_time=tmp_max_write_time; if(tmp_max_write_time<min_write_time) min_write_time=tmp_max_write_time; if(tmp_max_file_time>max_file_time) max_file_time=tmp_max_file_time; if(tmp_max_file_time<min_file_time) min_file_time=tmp_max_file_time; } /* end for */ /* Only print information from one node */ if(mpi_rank==0) { /* Print information about test */ printf("File driver used: %s\n",vfl_type==FACC_MPIO ? "MPI-I/O" : "MPI-posix"); printf("Type of parallel access: %s\n",par_mode==H5FD_MPIO_COLLECTIVE ? "Collective" : "Independent"); printf("Type of dataset storage: %s\n",use_chunks ? "Chunked" : "Contiguous"); printf("Number of processes: %d\n",mpi_size); printf("Element size: %u\n",(unsigned)sizeof(DEFAULT_C_TYPE)); printf("# of dimensions: %d\n",rank); printf("Dimension size: %ld\n",(long)dim_size); printf("Dimension sliced: %u\n",slice_dim); printf("Number of elements: %lu\n",(unsigned long)((buf_size/sizeof(DEFAULT_C_TYPE))*mpi_size)); printf("Total dataset size (bytes): %lu\n",(unsigned long)(buf_size*mpi_size)); printf("Dataset size per process (bytes): %lu\n",(unsigned long)buf_size); /* Print timing results */ printf("# of iterations: %u\n",num_iter); printf("Maximum raw data write throughput=%6.2f MB/s (%7.3f s)\n",MB_PER_SEC((buf_size*mpi_size),min_write_time),min_write_time); printf("Minimum raw data write throughput=%6.2f MB/s (%7.3f s)\n",MB_PER_SEC((buf_size*mpi_size),max_write_time),max_write_time); printf("Maximum file throughput=%6.2f MB/s (%7.3f s)\n",MB_PER_SEC((buf_size*mpi_size),min_file_time),min_file_time); printf("Minimum file throughput=%6.2f MB/s (%7.3f s)\n",MB_PER_SEC((buf_size*mpi_size),max_file_time),max_file_time); } /* end if */ done: /* Free buffers allocated */ if(file_name) free(file_name); if(buf) free(buf); if(dims) free(dims); if(start) free(start); if(count) free(count); /* MPI termination */ MPI_Finalize(); return(0); } /* end main() */