summaryrefslogtreecommitdiffstats
path: root/perform/mpi-perf.c
diff options
context:
space:
mode:
Diffstat (limited to 'perform/mpi-perf.c')
-rw-r--r--perform/mpi-perf.c357
1 files changed, 357 insertions, 0 deletions
diff --git a/perform/mpi-perf.c b/perform/mpi-perf.c
new file mode 100644
index 0000000..09a134b
--- /dev/null
+++ b/perform/mpi-perf.c
@@ -0,0 +1,357 @@
+/*
+ * (C) 1995-2001 Clemson University and Argonne National Laboratory.
+ *
+ * See COPYING in top-level directory.
+ *
+ * This is contributed by Robert Ross to the HDF5 software.
+ * and was called mpi-io-test.c
+ */
+
+#ifdef H5_HAVE_PARALLEL
+/* mpi-perf.c
+ *
+ * This is derived from code given to me by Rajeev Thakur. Dunno where
+ * it originated.
+ *
+ * It's purpose is to produce aggregate bandwidth numbers for varying
+ * block sizes, number of processors, an number of iterations.
+ *
+ * This is strictly an mpi program - it is used to test the MPI I/O
+ * functionality implemented by Romio.
+ *
+ * Compiling is usually easiest with something like:
+ * mpicc -Wall -Wstrict-prototypes mpi-io-test.c -o mpi-io-test
+ *
+ * NOTE: This code assumes that all command line arguments make it out to all
+ * the processes that make up the parallel job, which isn't always the case.
+ * So if it doesn't work on some platform, that might be why.
+ */
+/* Modifications:
+ * Albert Cheng, Apr 30, 20001
+ * Changed MPI_File_open to use MPI_COMM_WORLD (was MPI_COMM_SELF).
+ * Albert Cheng, May 5, 20001
+ * Changed MPI_File_seek then MPI_File_write or MPI_File_read to just
+ * MPI_File_write_at and MPI_File_read_at. Some compiler, e.g., IBM
+ * mpcc_r does not support MPI_File_seek and MPI_File_read or MPI_File_write.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/time.h>
+#include <mpi.h>
+#ifndef MPI_FILE_NULL /*MPIO may be defined in mpi.h already */
+# include <mpio.h>
+#endif
+
+
+
+/* DEFAULT VALUES FOR OPTIONS */
+int64_t opt_block = 1048576*16;
+int opt_iter = 1;
+int opt_stripe = -1;
+int opt_correct = 0;
+int amode = O_RDWR | O_CREAT;
+char opt_file[256] = "/foo/test.out\0";
+char opt_pvfstab[256] = "notset\0";
+int opt_pvfstab_set = 0;
+
+/* function prototypes */
+int parse_args(int argc, char **argv);
+double Wtime(void);
+
+extern int errno;
+extern int debug_on;
+
+/* globals needed for getopt */
+extern char *optarg;
+extern int optind, opterr;
+
+int main(int argc, char **argv)
+{
+ char *buf, *tmp, *buf2, *tmp2, *check;
+ int i, j, mynod=0, nprocs=1, err, my_correct = 1, correct, myerrno;
+ double stim, etim;
+ double write_tim = 0;
+ double read_tim = 0;
+ double read_bw, write_bw;
+ double max_read_tim, max_write_tim;
+ double min_read_tim, min_write_tim;
+ double ave_read_tim, ave_write_tim;
+ int64_t iter_jump = 0;
+ int64_t seek_position = 0;
+ MPI_File fh;
+ MPI_Status status;
+ int nchars;
+
+ /* startup MPI and determine the rank of this process */
+ MPI_Init(&argc,&argv);
+ MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+ MPI_Comm_rank(MPI_COMM_WORLD, &mynod);
+
+ /* parse the command line arguments */
+ parse_args(argc, argv);
+
+ if (mynod == 0) printf("# Using mpi-io calls.\n");
+
+
+ /* kindof a weird hack- if the location of the pvfstab file was
+ * specified on the command line, then spit out this location into
+ * the appropriate environment variable: */
+
+#if H5_HAVE_SETENV
+/* no setenv or unsetenv */
+ if (opt_pvfstab_set) {
+ if((setenv("PVFSTAB_FILE", opt_pvfstab, 1)) < 0){
+ perror("setenv");
+ goto die_jar_jar_die;
+ }
+ }
+#endif
+
+ /* this is how much of the file data is covered on each iteration of
+ * the test. used to help determine the seek offset on each
+ * iteration */
+ iter_jump = nprocs * opt_block;
+
+ /* setup a buffer of data to write */
+ if (!(tmp = (char *) malloc(opt_block + 256))) {
+ perror("malloc");
+ goto die_jar_jar_die;
+ }
+ buf = tmp + 128 - (((long)tmp) % 128); /* align buffer */
+
+ if (opt_correct) {
+ /* do the same buffer setup for verifiable data */
+ if (!(tmp2 = (char *) malloc(opt_block + 256))) {
+ perror("malloc2");
+ goto die_jar_jar_die;
+ }
+ buf2 = tmp + 128 - (((long)tmp) % 128);
+ }
+
+ /* open the file for writing */
+ err = MPI_File_open(MPI_COMM_WORLD, opt_file,
+ MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
+ if (err < 0) {
+ fprintf(stderr, "node %d, open error: %s\n", mynod, strerror(errno));
+ goto die_jar_jar_die;
+ }
+
+ /* now repeat the write operations the number of times
+ * specified on the command line */
+ for (j=0; j < opt_iter; j++) {
+
+ /* calculate the appropriate position depending on the iteration
+ * and rank of the current process */
+ seek_position = (j*iter_jump)+(mynod*opt_block);
+
+ if (opt_correct) /* fill in buffer for iteration */ {
+ for (i=mynod+j, check=buf; i<opt_block; i++,check++) *check=(char)i;
+ }
+
+ /* discover the starting time of the operation */
+ MPI_Barrier(MPI_COMM_WORLD);
+ stim = MPI_Wtime();
+
+ /* write out the data */
+ nchars = opt_block/sizeof(char);
+ err = MPI_File_write_at(fh, seek_position, buf, nchars, MPI_CHAR, &status);
+ if(err){
+ fprintf(stderr, "node %d, write error: %s\n", mynod,
+ strerror(errno));
+ }
+
+ /* discover the ending time of the operation */
+ etim = MPI_Wtime();
+
+ write_tim += (etim - stim);
+
+ /* we are done with this "write" iteration */
+ }
+
+ err = MPI_File_close(&fh);
+ if(err){
+ fprintf(stderr, "node %d, close error after write\n", mynod);
+ }
+
+ /* wait for everyone to synchronize at this point */
+ MPI_Barrier(MPI_COMM_WORLD);
+
+ /* reopen the file to read the data back out */
+ err = MPI_File_open(MPI_COMM_WORLD, opt_file,
+ MPI_MODE_CREATE | MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
+ if (err < 0) {
+ fprintf(stderr, "node %d, open error: %s\n", mynod, strerror(errno));
+ goto die_jar_jar_die;
+ }
+
+
+ /* we are going to repeat the read operation the number of iterations
+ * specified */
+ for (j=0; j < opt_iter; j++) {
+ /* calculate the appropriate spot give the current iteration and
+ * rank within the MPI processes */
+ seek_position = (j*iter_jump)+(mynod*opt_block);
+
+ /* discover the start time */
+ MPI_Barrier(MPI_COMM_WORLD);
+ stim = MPI_Wtime();
+
+ /* read in the file data */
+ if (!opt_correct){
+ err = MPI_File_read_at(fh, seek_position, buf, nchars, MPI_CHAR, &status);
+ }
+ else{
+ err = MPI_File_read_at(fh, seek_position, buf2, nchars, MPI_CHAR, &status);
+ }
+ myerrno = errno;
+
+ /* discover the end time */
+ etim = MPI_Wtime();
+ read_tim += (etim - stim);
+
+ if (err < 0) fprintf(stderr, "node %d, read error, loc = %Ld: %s\n",
+ mynod, mynod*opt_block, strerror(myerrno));
+
+ /* if the user wanted to check correctness, compare the write
+ * buffer to the read buffer */
+ if (opt_correct && memcmp(buf, buf2, opt_block)) {
+ fprintf(stderr, "node %d, correctness test failed\n", mynod);
+ my_correct = 0;
+ MPI_Allreduce(&my_correct, &correct, 1, MPI_INT, MPI_MIN,
+ MPI_COMM_WORLD);
+ }
+
+ /* we are done with this read iteration */
+ }
+
+ /* close the file */
+ err = MPI_File_close(&fh);
+ if(err){
+ fprintf(stderr, "node %d, close error after write\n", mynod);
+ }
+
+ /* compute the read and write times */
+ MPI_Allreduce(&read_tim, &max_read_tim, 1, MPI_DOUBLE, MPI_MAX,
+ MPI_COMM_WORLD);
+ MPI_Allreduce(&read_tim, &min_read_tim, 1, MPI_DOUBLE, MPI_MIN,
+ MPI_COMM_WORLD);
+ MPI_Allreduce(&read_tim, &ave_read_tim, 1, MPI_DOUBLE, MPI_SUM,
+ MPI_COMM_WORLD);
+
+ /* calculate the average from the sum */
+ ave_read_tim = ave_read_tim / nprocs;
+
+ MPI_Allreduce(&write_tim, &max_write_tim, 1, MPI_DOUBLE, MPI_MAX,
+ MPI_COMM_WORLD);
+ MPI_Allreduce(&write_tim, &min_write_tim, 1, MPI_DOUBLE, MPI_MIN,
+ MPI_COMM_WORLD);
+ MPI_Allreduce(&write_tim, &ave_write_tim, 1, MPI_DOUBLE, MPI_SUM,
+ MPI_COMM_WORLD);
+
+ /* calculate the average from the sum */
+ ave_write_tim = ave_write_tim / nprocs;
+
+ /* print out the results on one node */
+ if (mynod == 0) {
+ read_bw = ((int64_t)(opt_block*nprocs*opt_iter))/(max_read_tim*1000000.0);
+ write_bw = ((int64_t)(opt_block*nprocs*opt_iter))/(max_write_tim*1000000.0);
+
+ printf("nr_procs = %d, nr_iter = %d, blk_sz = %ld\n", nprocs,
+ opt_iter, (long)opt_block);
+
+ printf("# total_size = %ld\n", (long)(opt_block*nprocs*opt_iter));
+
+ printf("# Write: min_time = %f, max_time = %f, mean_time = %f\n",
+ min_write_tim, max_write_tim, ave_write_tim);
+ printf("# Read: min_time = %f, max_time = %f, mean_time = %f\n",
+ min_read_tim, max_read_tim, ave_read_tim);
+
+ printf("Write bandwidth = %f Mbytes/sec\n", write_bw);
+ printf("Read bandwidth = %f Mbytes/sec\n", read_bw);
+
+ if (opt_correct) {
+ printf("Correctness test %s.\n", correct ? "passed" : "failed");
+ }
+ }
+
+
+die_jar_jar_die:
+
+#if H5_HAVE_SETENV
+/* no setenv or unsetenv */
+ /* clear the environment variable if it was set earlier */
+ if (opt_pvfstab_set){
+ unsetenv("PVFSTAB_FILE");
+ }
+#endif
+
+ free(tmp);
+ if (opt_correct) free(tmp2);
+ MPI_Finalize();
+ return(0);
+}
+
+int parse_args(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "s:b:i:f:p:c")) != EOF) {
+ switch (c) {
+ case 's': /* stripe */
+ opt_stripe = atoi(optarg);
+ break;
+ case 'b': /* block size */
+ opt_block = atoi(optarg);
+ break;
+ case 'i': /* iterations */
+ opt_iter = atoi(optarg);
+ break;
+ case 'f': /* filename */
+ strncpy(opt_file, optarg, 255);
+ break;
+ case 'p': /* pvfstab file */
+ strncpy(opt_pvfstab, optarg, 255);
+ opt_pvfstab_set = 1;
+ break;
+ case 'c': /* correctness */
+ opt_correct = 1;
+ break;
+ case '?': /* unknown */
+ default:
+ break;
+ }
+ }
+ return(0);
+}
+
+/* Wtime() - returns current time in sec., in a double */
+double Wtime()
+{
+ struct timeval t;
+
+ gettimeofday(&t, NULL);
+ return((double)t.tv_sec + (double)t.tv_usec / 1000000);
+}
+
+/*
+ * Local variables:
+ * c-indent-level: 3
+ * c-basic-offset: 3
+ * tab-width: 3
+ * End:
+ */
+
+#else /* H5_HAVE_PARALLEL */
+/* dummy program since H5_HAVE_PARALLE is not configured in */
+int
+main()
+{
+printf("No parallel performance because parallel is not configured in\n");
+return(0);
+}
+#endif /* H5_HAVE_PARALLEL */