diff options
Diffstat (limited to 'tools/src/h5perf')
-rw-r--r-- | tools/src/h5perf/CMakeLists.txt | 103 | ||||
-rw-r--r-- | tools/src/h5perf/Makefile.am | 63 | ||||
-rw-r--r-- | tools/src/h5perf/perf.c | 796 | ||||
-rw-r--r-- | tools/src/h5perf/pio_engine.c | 2745 | ||||
-rw-r--r-- | tools/src/h5perf/pio_perf.c | 1834 | ||||
-rw-r--r-- | tools/src/h5perf/pio_perf.h | 109 | ||||
-rw-r--r-- | tools/src/h5perf/sio_engine.c | 1328 | ||||
-rw-r--r-- | tools/src/h5perf/sio_perf.c | 1437 | ||||
-rw-r--r-- | tools/src/h5perf/sio_perf.h | 104 |
9 files changed, 8519 insertions, 0 deletions
diff --git a/tools/src/h5perf/CMakeLists.txt b/tools/src/h5perf/CMakeLists.txt new file mode 100644 index 0000000..644a5ad --- /dev/null +++ b/tools/src/h5perf/CMakeLists.txt @@ -0,0 +1,103 @@ +cmake_minimum_required (VERSION 3.12) +project (HDF5_TOOLS_SRC_H5PERF C) + +# -------------------------------------------------------------------- +# Add the executables +# -------------------------------------------------------------------- +#-- Adding test for h5perf_serial +set (h5perf_serial_SOURCES + ${HDF5_TOOLS_SRC_H5PERF_SOURCE_DIR}/sio_perf.c + ${HDF5_TOOLS_SRC_H5PERF_SOURCE_DIR}/sio_engine.c +) +add_executable (h5perf_serial ${h5perf_serial_SOURCES}) +target_include_directories (h5perf_serial PRIVATE "${HDF5_TEST_SRC_DIR};${HDF5_SRC_DIR};${HDF5_SRC_BINARY_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>") +if (NOT ONLY_SHARED_LIBS) + TARGET_C_PROPERTIES (h5perf_serial STATIC) + target_link_libraries (h5perf_serial PRIVATE ${HDF5_TOOLS_LIB_TARGET} ${HDF5_LIB_TARGET}) +else () + TARGET_C_PROPERTIES (h5perf_serial SHARED) + target_link_libraries (h5perf_serial PRIVATE ${HDF5_TOOLS_LIBSH_TARGET} ${HDF5_LIBSH_TARGET}) +endif () +set_target_properties (h5perf_serial PROPERTIES FOLDER perform) +set_global_variable (HDF5_UTILS_TO_EXPORT "${HDF5_UTILS_TO_EXPORT};h5perf_serial") + +set (H5_DEP_EXECUTABLES h5perf_serial) + +#----------------------------------------------------------------------------- +# Add Target to clang-format +#----------------------------------------------------------------------------- +if (HDF5_ENABLE_FORMATTERS) + clang_format (HDF5_TOOLS_SRC_H5PERF_h5perf_serial_FORMAT h5perf_serial) +endif () + +if (H5_HAVE_PARALLEL) + if (UNIX) + #-- Adding test for perf - only on unix systems + set (perf_SOURCES + ${HDF5_TOOLS_SRC_H5PERF_SOURCE_DIR}/perf.c + ) + add_executable (perf ${perf_SOURCES}) + target_include_directories (perf PRIVATE "${HDF5_TEST_SRC_DIR};${HDF5_SRC_DIR};${HDF5_SRC_BINARY_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>") + if (NOT ONLY_SHARED_LIBS) + TARGET_C_PROPERTIES (perf STATIC) + target_link_libraries (perf PRIVATE ${HDF5_TOOLS_LIB_TARGET} ${HDF5_LIB_TARGET}) + else () + TARGET_C_PROPERTIES (perf SHARED) + target_link_libraries (perf PRIVATE ${HDF5_TOOLS_LIBSH_TARGET} ${HDF5_LIBSH_TARGET}) + endif () + set_target_properties (perf PROPERTIES FOLDER perform) + set_global_variable (HDF5_UTILS_TO_EXPORT "${HDF5_UTILS_TO_EXPORT};perf") + + set (H5_DEP_EXECUTABLES perf) + + #----------------------------------------------------------------------------- + # Add Target to clang-format + #----------------------------------------------------------------------------- + if (HDF5_ENABLE_FORMATTERS) + clang_format (HDF5_TOOLS_SRC_H5PERF_perf_FORMAT perf) + endif () + endif () + + #-- Adding test for h5perf + set (h5perf_SOURCES + ${HDF5_TOOLS_SRC_H5PERF_SOURCE_DIR}/pio_perf.c + ${HDF5_TOOLS_SRC_H5PERF_SOURCE_DIR}/pio_engine.c + ) + add_executable (h5perf ${h5perf_SOURCES}) + target_include_directories (h5perf PRIVATE "${HDF5_SRC_DIR};${HDF5_SRC_BINARY_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>") + if (NOT ONLY_SHARED_LIBS) + TARGET_C_PROPERTIES (h5perf STATIC) + target_link_libraries (h5perf PRIVATE ${LINK_LIBS} ${HDF5_TOOLS_LIB_TARGET} ${HDF5_LIB_TARGET}) + else () + TARGET_C_PROPERTIES (h5perf SHARED) + target_link_libraries (h5perf PRIVATE ${LINK_LIBS} ${HDF5_TOOLS_LIBSH_TARGET} ${HDF5_LIBSH_TARGET}) + endif () + set_target_properties (h5perf PROPERTIES FOLDER perform) + set_global_variable (HDF5_UTILS_TO_EXPORT "${HDF5_UTILS_TO_EXPORT};h5perf") + + set (H5_DEP_EXECUTABLES h5perf) + + #----------------------------------------------------------------------------- + # Add Target to clang-format + #----------------------------------------------------------------------------- + if (HDF5_ENABLE_FORMATTERS) + clang_format (HDF5_TOOLS_SRC_H5PERF_h5perf_FORMAT h5perf) + endif () +endif () + +#----------------------------------------------------------------------------- +# Rules for Installation of tools using make Install target +#----------------------------------------------------------------------------- +if (HDF5_EXPORTED_TARGETS) + foreach (exec ${H5_DEP_EXECUTABLES}) + INSTALL_PROGRAM_PDB (${exec} ${HDF5_INSTALL_BIN_DIR} toolsapplications) + endforeach () + + install ( + TARGETS + ${H5_DEP_EXECUTABLES} + EXPORT + ${HDF5_EXPORTED_TARGETS} + RUNTIME DESTINATION ${HDF5_INSTALL_BIN_DIR} COMPONENT toolsapplications + ) +endif () diff --git a/tools/src/h5perf/Makefile.am b/tools/src/h5perf/Makefile.am new file mode 100644 index 0000000..e8a9fdd --- /dev/null +++ b/tools/src/h5perf/Makefile.am @@ -0,0 +1,63 @@ +# +# Copyright by The HDF Group. +# Copyright by the Board of Trustees of the University of Illinois. +# All rights reserved. +# +# This file is part of HDF5. The full HDF5 copyright notice, including +# terms governing use, modification, and redistribution, is contained in +# the COPYING file, which can be found at the root of the source code +# distribution tree, or in https://www.hdfgroup.org/licenses. +# If you do not have access to either file, you may request a copy from +# help@hdfgroup.org. +## +## Makefile.am +## Run automake to generate a Makefile.in from this file. +## +# +# HDF5 Library Performance Makefile(.in) +# + +include $(top_srcdir)/config/commence.am + +AM_CPPFLAGS+=-I$(top_srcdir)/src -I$(top_srcdir)/test -I$(top_srcdir)/tools/lib + +# bin_PROGRAMS will be installed. +if BUILD_PARALLEL_CONDITIONAL + bin_PROGRAMS=h5perf_serial perf h5perf +else + bin_PROGRAMS=h5perf_serial +endif + +# Add h5perf and h5perf_serial specific linker flags here +h5perf_LDFLAGS = $(LT_STATIC_EXEC) $(AM_LDFLAGS) +h5perf_serial_LDFLAGS = $(LT_STATIC_EXEC) $(AM_LDFLAGS) + +# Some programs are not built or run by default, but can be built by hand or by +# specifying --enable-build-all at configure time. +# Also, some of these programs should only be built in parallel. +# Currently there is no such program. +if BUILD_PARALLEL_CONDITIONAL + PARA_BUILD_ALL= +endif +if BUILD_ALL_CONDITIONAL + BUILD_ALL_PROGS=$(PARA_BUILD_ALL) +endif + +# Define programs that will be run in 'make check' +# List them in the order they should be run. +# Parallel test programs. +if BUILD_PARALLEL_CONDITIONAL + TEST_PROG_PARA=h5perf perf +endif + +h5perf_SOURCES=pio_perf.c pio_engine.c +h5perf_serial_SOURCES=sio_perf.c sio_engine.c + +# All of the programs depend on the main hdf5 library, and some of them +# depend on test or tools library. +LDADD=$(LIBHDF5) +h5perf_LDADD=$(LIBH5TOOLS) $(LIBHDF5) +h5perf_serial_LDADD=$(LIBH5TOOLS) $(LIBHDF5) +perf_LDADD=$(LIBHDF5) + +include $(top_srcdir)/config/conclude.am diff --git a/tools/src/h5perf/perf.c b/tools/src/h5perf/perf.c new file mode 100644 index 0000000..83d4ab0 --- /dev/null +++ b/tools/src/h5perf/perf.c @@ -0,0 +1,796 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the root of the source code * + * distribution tree, or in https://www.hdfgroup.org/licenses. * + * If you do not have access to either file, you may request a copy from * + * help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Author: Albert Cheng of NCSA, May 1, 2001. + * This is derived from code given to me by Robert Ross. + * + * NOTE: This code assumes that all command line arguments make it out to all + * the processes that make up the parallel job, which isn't always the case. + * So if it doesn't work on some platform, that might be why. + */ + +#include "hdf5.h" +#include "H5private.h" + +#ifdef H5_HAVE_PARALLEL + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#ifdef H5_HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif + +#ifdef H5_HAVE_SYS_TIME_H +#include <sys/time.h> +#endif + +#ifdef H5_HAVE_SYS_TYPES_H +#include <sys/types.h> +#endif + +#ifdef H5_HAVE_UNISTD_H +#include <unistd.h> +#endif + +#include <mpi.h> +#ifndef MPI_FILE_NULL /*MPIO may be defined in mpi.h already */ +#include <mpio.h> +#endif + +/* Macro definitions */ +/* Verify: + * if val is false (0), print mesg and if fatal is true (non-zero), die. + */ +#define H5FATAL 1 +#define VRFY(val, mesg, fatal) \ + do { \ + if (!val) { \ + printf("Proc %d: ", mynod); \ + printf("*** Assertion failed (%s) at line %4d in %s\n", mesg, (int)__LINE__, __FILE__); \ + if (fatal) { \ + fflush(stdout); \ + goto die_jar_jar_die; \ + } \ + } \ + } while (0) +#define RANK 1 +#define MAX_PATH 1024 + +hsize_t dims[RANK]; /* dataset dim sizes */ +hsize_t block[RANK], stride[RANK], count[RANK]; +hsize_t start[RANK]; +hid_t fid; /* HDF5 file ID */ +hid_t acc_tpl; /* File access templates */ +hid_t sid; /* Dataspace ID */ +hid_t file_dataspace; /* File dataspace ID */ +hid_t mem_dataspace; /* memory dataspace ID */ +hid_t dataset; /* Dataset ID */ +hsize_t opt_alignment = 1; +hsize_t opt_threshold = 1; +int opt_split_vfd = 0; +char * meta_ext, *raw_ext; /* holds the meta and raw file extension if */ + /* opt_split_vfd is set */ + +/* DEFAULT VALUES FOR OPTIONS */ +int64_t opt_block = 1048576 * 16; +int opt_iter = 1; +int opt_stripe = -1; +int opt_correct = 0; +int amode = O_RDWR | O_CREAT; +char opt_file[256] = "perftest.out"; +char opt_pvfstab[256] = "notset"; +int opt_pvfstab_set = 0; + +const char *FILENAME[] = {opt_file, NULL}; + +/* function prototypes */ +static int parse_args(int argc, char **argv); + +#ifndef H5_HAVE_UNISTD_H +/* globals needed for getopt */ +extern char *optarg; +#endif + +#ifndef HDF5_PARAPREFIX +#define HDF5_PARAPREFIX "" +#endif +char * paraprefix = NULL; /* for command line option para-prefix */ +MPI_Info h5_io_info_g = MPI_INFO_NULL; /* MPI INFO object for IO */ + +static char *h5_fixname_real(const char *base_name, hid_t fapl, const char *_suffix, char *fullname, + size_t size, hbool_t nest_printf, hbool_t subst_for_superblock); + +int +main(int argc, char **argv) +{ + char * buf, *tmp, *buf2 = NULL, *tmp2 = NULL, *check; + int i, j, mynod = 0, nprocs = 1, my_correct = 1, correct, myerrno; + double stim, etim; + double write_tim = 0; + double read_tim = 0; + double read_bw, write_bw; + double max_read_tim, max_write_tim; + double min_read_tim, min_write_tim; + double ave_read_tim, ave_write_tim; + int64_t iter_jump = 0; + char filename[MAX_PATH]; + herr_t ret; /* Generic return value */ + + /* startup MPI and determine the rank of this process */ + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &mynod); + + /* parse the command line arguments */ + parse_args(argc, argv); + + if (mynod == 0) + printf("# Using hdf5-io calls.\n"); + +#ifdef H5_HAVE_UNISTD_H + /* Kind of a weird hack- if the location of the pvfstab file was + * specified on the command line, then spit out this location into + * the appropriate environment variable. + */ + if (opt_pvfstab_set) { + if ((setenv("PVFSTAB_FILE", opt_pvfstab, 1)) < 0) { + perror("setenv"); + goto die_jar_jar_die; + } + } +#endif + + /* this is how much of the file data is covered on each iteration of + * the test. used to help determine the seek offset on each + * iteration */ + iter_jump = nprocs * opt_block; + + /* setup a buffer of data to write */ + if (!(tmp = (char *)malloc((size_t)opt_block + 256))) { + perror("malloc"); + goto die_jar_jar_die; + } + buf = tmp + 128 - (((long)tmp) % 128); /* align buffer */ + + if (opt_correct) { + /* do the same buffer setup for verifiable data */ + if (!(tmp2 = (char *)malloc((size_t)opt_block + 256))) { + perror("malloc2"); + goto die_jar_jar_die; + } + buf2 = tmp + 128 - (((long)tmp) % 128); + } + + /* setup file access template with parallel IO access. */ + if (opt_split_vfd) { + hid_t mpio_pl; + + mpio_pl = H5Pcreate(H5P_FILE_ACCESS); + VRFY((acc_tpl >= 0), "", H5FATAL); + ret = H5Pset_fapl_mpio(mpio_pl, MPI_COMM_WORLD, MPI_INFO_NULL); + VRFY((ret >= 0), "", H5FATAL); + + /* set optional allocation alignment */ + if (opt_alignment * opt_threshold != 1) { + ret = H5Pset_alignment(acc_tpl, opt_threshold, opt_alignment); + VRFY((ret >= 0), "H5Pset_alignment succeeded", !H5FATAL); + } + + /* setup file access template */ + acc_tpl = H5Pcreate(H5P_FILE_ACCESS); + VRFY((acc_tpl >= 0), "", H5FATAL); + ret = H5Pset_fapl_split(acc_tpl, meta_ext, mpio_pl, raw_ext, mpio_pl); + VRFY((ret >= 0), "H5Pset_fapl_split succeeded", H5FATAL); + ret = H5Pclose(mpio_pl); + VRFY((ret >= 0), "H5Pclose mpio_pl succeeded", H5FATAL); + } + else { + /* setup file access template */ + acc_tpl = H5Pcreate(H5P_FILE_ACCESS); + VRFY((acc_tpl >= 0), "", H5FATAL); + ret = H5Pset_fapl_mpio(acc_tpl, MPI_COMM_WORLD, MPI_INFO_NULL); + VRFY((ret >= 0), "", H5FATAL); + + /* set optional allocation alignment */ + if (opt_alignment * opt_threshold != 1) { + ret = H5Pset_alignment(acc_tpl, opt_threshold, opt_alignment); + VRFY((ret >= 0), "H5Pset_alignment succeeded", !H5FATAL); + } + } + + h5_fixname_real(FILENAME[0], acc_tpl, NULL, filename, sizeof filename, FALSE, FALSE); + + /* create the parallel file */ + fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, acc_tpl); + VRFY((fid >= 0), "H5Fcreate succeeded", H5FATAL); + + /* define a contiquous dataset of opt_iter*nprocs*opt_block chars */ + dims[0] = (hsize_t)opt_iter * (hsize_t)nprocs * (hsize_t)opt_block; + sid = H5Screate_simple(RANK, dims, NULL); + VRFY((sid >= 0), "H5Screate_simple succeeded", H5FATAL); + dataset = H5Dcreate2(fid, "Dataset1", H5T_NATIVE_CHAR, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + VRFY((dataset >= 0), "H5Dcreate2 succeeded", H5FATAL); + + /* create the memory dataspace and the file dataspace */ + dims[0] = (hsize_t)opt_block; + mem_dataspace = H5Screate_simple(RANK, dims, NULL); + VRFY((mem_dataspace >= 0), "", H5FATAL); + file_dataspace = H5Dget_space(dataset); + VRFY((file_dataspace >= 0), "H5Dget_space succeeded", H5FATAL); + + /* now each process writes a block of opt_block chars in round robbin + * fashion until the whole dataset is covered. + */ + for (j = 0; j < opt_iter; j++) { + /* setup a file dataspace selection */ + start[0] = (hsize_t)((j * iter_jump) + (mynod * opt_block)); + stride[0] = block[0] = (hsize_t)opt_block; + count[0] = 1; + ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((ret >= 0), "H5Sset_hyperslab succeeded", H5FATAL); + + if (opt_correct) /* fill in buffer for iteration */ { + for (i = mynod + j, check = buf; i < opt_block; i++, check++) + *check = (char)i; + } + + /* discover the starting time of the operation */ + MPI_Barrier(MPI_COMM_WORLD); + stim = MPI_Wtime(); + + /* write data */ + ret = H5Dwrite(dataset, H5T_NATIVE_CHAR, mem_dataspace, file_dataspace, H5P_DEFAULT, buf); + VRFY((ret >= 0), "H5Dwrite dataset1 succeeded", !H5FATAL); + + /* discover the ending time of the operation */ + etim = MPI_Wtime(); + + write_tim += (etim - stim); + + /* we are done with this "write" iteration */ + } + + /* close dataset and file */ + ret = H5Dclose(dataset); + VRFY((ret >= 0), "H5Dclose succeeded", H5FATAL); + ret = H5Fclose(fid); + VRFY((ret >= 0), "H5Fclose succeeded", H5FATAL); + + /* wait for everyone to synchronize at this point */ + MPI_Barrier(MPI_COMM_WORLD); + + /* reopen the file for reading */ + fid = H5Fopen(filename, H5F_ACC_RDONLY, acc_tpl); + VRFY((fid >= 0), "", H5FATAL); + + /* open the dataset */ + dataset = H5Dopen2(fid, "Dataset1", H5P_DEFAULT); + VRFY((dataset >= 0), "H5Dopen succeeded", H5FATAL); + + /* we can re-use the same mem_dataspace and file_dataspace + * the H5Dwrite used since the dimension size is the same. + */ + + /* we are going to repeat the read the same pattern the write used */ + for (j = 0; j < opt_iter; j++) { + /* setup a file dataspace selection */ + start[0] = (hsize_t)((j * iter_jump) + (mynod * opt_block)); + stride[0] = block[0] = (hsize_t)opt_block; + count[0] = 1; + ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((ret >= 0), "H5Sset_hyperslab succeeded", H5FATAL); + /* seek to the appropriate spot give the current iteration and + * rank within the MPI processes */ + + /* discover the start time */ + MPI_Barrier(MPI_COMM_WORLD); + stim = MPI_Wtime(); + + /* read in the file data */ + if (!opt_correct) { + ret = H5Dread(dataset, H5T_NATIVE_CHAR, mem_dataspace, file_dataspace, H5P_DEFAULT, buf); + } + else { + ret = H5Dread(dataset, H5T_NATIVE_CHAR, mem_dataspace, file_dataspace, H5P_DEFAULT, buf2); + } + myerrno = errno; + + /* discover the end time */ + etim = MPI_Wtime(); + read_tim += (etim - stim); + VRFY((ret >= 0), "H5Dwrite dataset1 succeeded", !H5FATAL); + + if (ret < 0) + HDfprintf(stderr, "node %d, read error, loc = %" PRId64 ": %s\n", mynod, mynod * opt_block, + strerror(myerrno)); + + /* if the user wanted to check correctness, compare the write + * buffer to the read buffer */ + if (opt_correct && memcmp(buf, buf2, (size_t)opt_block)) { + HDfprintf(stderr, "node %d, correctness test failed\n", mynod); + my_correct = 0; + MPI_Allreduce(&my_correct, &correct, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); + } + + /* we are done with this read iteration */ + } + + /* close dataset and file */ + ret = H5Dclose(dataset); + VRFY((ret >= 0), "H5Dclose succeeded", H5FATAL); + ret = H5Fclose(fid); + VRFY((ret >= 0), "H5Fclose succeeded", H5FATAL); + ret = H5Pclose(acc_tpl); + VRFY((ret >= 0), "H5Pclose succeeded", H5FATAL); + + /* compute the read and write times */ + MPI_Allreduce(&read_tim, &max_read_tim, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(&read_tim, &min_read_tim, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(&read_tim, &ave_read_tim, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + /* calculate the average from the sum */ + ave_read_tim = ave_read_tim / nprocs; + + MPI_Allreduce(&write_tim, &max_write_tim, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + MPI_Allreduce(&write_tim, &min_write_tim, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(&write_tim, &ave_write_tim, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + /* calculate the average from the sum */ + ave_write_tim = ave_write_tim / nprocs; + + /* print out the results on one node */ + if (mynod == 0) { + read_bw = (double)((int64_t)(opt_block * nprocs * opt_iter)) / (max_read_tim * 1000000.0); + write_bw = (double)((int64_t)(opt_block * nprocs * opt_iter)) / (max_write_tim * 1000000.0); + + printf("nr_procs = %d, nr_iter = %d, blk_sz = %ld\n", nprocs, opt_iter, (long)opt_block); + + printf("# total_size = %ld\n", (long)(opt_block * nprocs * opt_iter)); + + printf("# Write: min_time = %f, max_time = %f, mean_time = %f\n", min_write_tim, max_write_tim, + ave_write_tim); + printf("# Read: min_time = %f, max_time = %f, mean_time = %f\n", min_read_tim, max_read_tim, + ave_read_tim); + + printf("Write bandwidth = %f Mbytes/sec\n", write_bw); + printf("Read bandwidth = %f Mbytes/sec\n", read_bw); + + if (opt_correct) { + printf("Correctness test %s.\n", correct ? "passed" : "failed"); + } + } + +die_jar_jar_die: + +#ifdef H5_HAVE_UNISTD + /* Clear the environment variable if it was set earlier */ + if (opt_pvfstab_set) { + unsetenv("PVFSTAB_FILE"); + } +#endif + + free(tmp); + if (opt_correct) + free(tmp2); + + MPI_Finalize(); + + return (0); +} + +static int +parse_args(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "s:b:i:f:p:a:2:c")) != EOF) { + switch (c) { + case 's': /* stripe */ + opt_stripe = atoi(optarg); + break; + case 'b': /* block size */ + opt_block = atoi(optarg); + break; + case 'i': /* iterations */ + opt_iter = atoi(optarg); + break; + case 'f': /* filename */ + strncpy(opt_file, optarg, 255); + FILENAME[0] = opt_file; + break; + case 'p': /* pvfstab file */ + strncpy(opt_pvfstab, optarg, 255); + opt_pvfstab_set = 1; + break; + case 'a': /* aligned allocation. + * syntax: -a<alignment>/<threshold> + * e.g., -a4096/512 allocate at 4096 bytes + * boundary if request size >= 512. + */ + { + char *p; + + opt_alignment = (hsize_t)HDatoi(optarg); + if (NULL != (p = (char *)HDstrchr(optarg, '/'))) + opt_threshold = (hsize_t)HDatoi(p + 1); + } + HDfprintf(stdout, "alignment/threshold=%" PRIuHSIZE "/%" PRIuHSIZE "\n", opt_alignment, + opt_threshold); + break; + case '2': /* use 2-files, i.e., split file driver */ + opt_split_vfd = 1; + /* get meta and raw file extension. */ + /* syntax is <raw_ext>,<meta_ext> */ + meta_ext = raw_ext = optarg; + while (*raw_ext != '\0') { + if (*raw_ext == ',') { + *raw_ext = '\0'; + raw_ext++; + break; + } + raw_ext++; + } + printf("split-file-vfd used: %s,%s\n", meta_ext, raw_ext); + break; + case 'c': /* correctness */ + opt_correct = 1; + break; + case '?': /* unknown */ + default: + break; + } + } + + return (0); +} +/*------------------------------------------------------------------------- + * Function: getenv_all + * + * Purpose: Used to get the environment that the root MPI task has. + * name specifies which environment variable to look for + * val is the string to which the value of that environment + * variable will be copied. + * + * NOTE: The pointer returned by this function is only + * valid until the next call to getenv_all and the data + * stored there must be copied somewhere else before any + * further calls to getenv_all take place. + * + * Return: pointer to a string containing the value of the environment variable + * NULL if the varialbe doesn't exist in task 'root's environment. + * + * Programmer: Leon Arber + * 4/4/05 + * + * Modifications: + * Use original getenv if MPI is not initialized. This happens + * one uses the PHDF5 library to build a serial nature code. + * Albert 2006/04/07 + * + *------------------------------------------------------------------------- + */ +char * +getenv_all(MPI_Comm comm, int root, const char *name) +{ + int mpi_size, mpi_rank, mpi_initialized, mpi_finalized; + int len; + static char *env = NULL; + + HDassert(name); + + MPI_Initialized(&mpi_initialized); + MPI_Finalized(&mpi_finalized); + + if (mpi_initialized && !mpi_finalized) { + MPI_Comm_rank(comm, &mpi_rank); + MPI_Comm_size(comm, &mpi_size); + HDassert(root < mpi_size); + + /* The root task does the getenv call + * and sends the result to the other tasks */ + if (mpi_rank == root) { + env = HDgetenv(name); + if (env) { + len = (int)HDstrlen(env); + MPI_Bcast(&len, 1, MPI_INT, root, comm); + MPI_Bcast(env, len, MPI_CHAR, root, comm); + } + else { + /* len -1 indicates that the variable was not in the environment */ + len = -1; + MPI_Bcast(&len, 1, MPI_INT, root, comm); + } + } + else { + MPI_Bcast(&len, 1, MPI_INT, root, comm); + if (len >= 0) { + if (env == NULL) + env = (char *)HDmalloc((size_t)len + 1); + else if (HDstrlen(env) < (size_t)len) + env = (char *)HDrealloc(env, (size_t)len + 1); + + MPI_Bcast(env, len, MPI_CHAR, root, comm); + env[len] = '\0'; + } + else { + if (env) + HDfree(env); + env = NULL; + } + } +#ifndef NDEBUG + MPI_Barrier(comm); +#endif + } + else { + /* use original getenv */ + if (env) + HDfree(env); + env = HDgetenv(name); + } /* end if */ + + return env; +} + +/*------------------------------------------------------------------------- + * Function: h5_fixname_real + * + * Purpose: Create a file name from a file base name like `test' and + * return it through the FULLNAME (at most SIZE characters + * counting the null terminator). The full name is created by + * prepending the contents of HDF5_PREFIX (separated from the + * base name by a slash) and appending a file extension based on + * the driver supplied, resulting in something like + * `ufs:/u/matzke/test.h5'. + * + * Return: Success: The FULLNAME pointer. + * + * Failure: NULL if BASENAME or FULLNAME is the null + * pointer or if FULLNAME isn't large enough for + * the result. + * + * Programmer: Robb Matzke + * Thursday, November 19, 1998 + * + *------------------------------------------------------------------------- + */ +static char * +h5_fixname_real(const char *base_name, hid_t fapl, const char *_suffix, char *fullname, size_t size, + hbool_t nest_printf, hbool_t subst_for_superblock) +{ + const char *prefix = NULL; + const char *env = NULL; /* HDF5_DRIVER environment variable */ + char * ptr, last = '\0'; + const char *suffix = _suffix; + size_t i, j; + hid_t driver = -1; + int isppdriver = 0; /* if the driver is MPI parallel */ + + if (!base_name || !fullname || size < 1) + return NULL; + + HDmemset(fullname, 0, size); + + /* figure out the suffix */ + if (H5P_DEFAULT != fapl) { + if ((driver = H5Pget_driver(fapl)) < 0) + return NULL; + + if (suffix) { + if (H5FD_FAMILY == driver) { + if (subst_for_superblock) + suffix = "00000.h5"; + else + suffix = nest_printf ? "%%05d.h5" : "%05d.h5"; + } + else if (H5FD_MULTI == driver) { + + /* Get the environment variable, if it exists, in case + * we are using the split driver since both of those + * use the multi VFD under the hood. + */ + env = HDgetenv("HDF5_DRIVER"); +#ifdef HDF5_DRIVER + /* Use the environment variable, then the compile-time constant */ + if (!env) + env = HDF5_DRIVER; +#endif + if (env && !HDstrcmp(env, "split")) { + /* split VFD */ + if (subst_for_superblock) + suffix = "-m.h5"; + else + suffix = NULL; + } + else { + /* multi VFD */ + if (subst_for_superblock) + suffix = "-s.h5"; + else + suffix = NULL; + } + } + } + } + + /* Must first check fapl is not H5P_DEFAULT (-1) because H5FD_XXX + * could be of value -1 if it is not defined. + */ + isppdriver = H5P_DEFAULT != fapl && (H5FD_MPIO == driver); + + /* Check what prefix to use for test files. Process HDF5_PARAPREFIX and + * HDF5_PREFIX. + * Use different ones depending on parallel or serial driver used. + * (The #ifdef is needed to prevent compile failure in case MPI is not + * configured.) + */ + if (isppdriver) { + /* + * For parallel: + * First use command line option, then the environment + * variable, then try the constant + */ + static int explained = 0; + + prefix = (paraprefix ? paraprefix : getenv_all(MPI_COMM_WORLD, 0, "HDF5_PARAPREFIX")); + + if (!prefix && !explained) { + /* print hint by process 0 once. */ + int mpi_rank; + + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + + if (mpi_rank == 0) + HDprintf("*** Hint ***\n" + "You can use environment variable HDF5_PARAPREFIX to " + "run parallel test files in a\n" + "different directory or to add file type prefix. e.g.,\n" + " HDF5_PARAPREFIX=pfs:/PFS/user/me\n" + " export HDF5_PARAPREFIX\n" + "*** End of Hint ***\n"); + + explained = TRUE; +#ifdef HDF5_PARAPREFIX + prefix = HDF5_PARAPREFIX; +#endif /* HDF5_PARAPREFIX */ + } + } + else { + /* + * For serial: + * First use the environment variable, then try the constant + */ + prefix = HDgetenv("HDF5_PREFIX"); + +#ifdef HDF5_PREFIX + if (!prefix) + prefix = HDF5_PREFIX; +#endif /* HDF5_PREFIX */ + } + + /* Prepend the prefix value to the base name */ + if (prefix && *prefix) { + if (isppdriver) { + /* This is a parallel system */ + char *subdir; + + if (!HDstrcmp(prefix, HDF5_PARAPREFIX)) { + /* + * If the prefix specifies the HDF5_PARAPREFIX directory, then + * default to using the "/tmp/$USER" or "/tmp/$LOGIN" + * directory instead. + */ + char *user, *login; + + user = HDgetenv("USER"); + login = HDgetenv("LOGIN"); + subdir = (user ? user : login); + + if (subdir) { + for (i = 0; i < size && prefix[i]; i++) + fullname[i] = prefix[i]; + + fullname[i++] = '/'; + + for (j = 0; i < size && subdir[j]; ++i, ++j) + fullname[i] = subdir[j]; + } + } + + if (!fullname[0]) { + /* We didn't append the prefix yet */ + HDstrncpy(fullname, prefix, size); + fullname[size - 1] = '\0'; + } + + if (HDstrlen(fullname) + HDstrlen(base_name) + 1 < size) { + /* + * Append the base_name with a slash first. Multiple + * slashes are handled below. + */ + h5_stat_t buf; + + if (HDstat(fullname, &buf) < 0) + /* The directory doesn't exist just yet */ + if (HDmkdir(fullname, (mode_t)0755) < 0 && errno != EEXIST) + /* + * We couldn't make the "/tmp/${USER,LOGIN}" + * subdirectory. Default to PREFIX's original + * prefix value. + */ + HDstrcpy(fullname, prefix); + + HDstrcat(fullname, "/"); + HDstrcat(fullname, base_name); + } + else { + /* Buffer is too small */ + return NULL; + } + } + else { + if (HDsnprintf(fullname, size, "%s/%s", prefix, base_name) == (int)size) + /* Buffer is too small */ + return NULL; + } + } + else if (HDstrlen(base_name) >= size) { + /* Buffer is too small */ + return NULL; + } + else { + HDstrcpy(fullname, base_name); + } + + /* Append a suffix */ + if (suffix) { + if (HDstrlen(fullname) + HDstrlen(suffix) >= size) + return NULL; + + HDstrcat(fullname, suffix); + } + + /* Remove any double slashes in the filename */ + for (ptr = fullname, i = j = 0; ptr && i < size; i++, ptr++) { + if (*ptr != '/' || last != '/') + fullname[j++] = *ptr; + + last = *ptr; + } + + return fullname; +} + +/* + * Local variables: + * c-indent-level: 3 + * c-basic-offset: 3 + * tab-width: 3 + * End: + */ + +#else /* H5_HAVE_PARALLEL */ +/* dummy program since H5_HAVE_PARALLEL is not configured in */ +int +main(int H5_ATTR_UNUSED argc, char H5_ATTR_UNUSED **argv) +{ + printf("No parallel performance because parallel is not configured in\n"); + return (0); +} +#endif /* H5_HAVE_PARALLEL */ diff --git a/tools/src/h5perf/pio_engine.c b/tools/src/h5perf/pio_engine.c new file mode 100644 index 0000000..cac36d7 --- /dev/null +++ b/tools/src/h5perf/pio_engine.c @@ -0,0 +1,2745 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the root of the source code * + * distribution tree, or in https://www.hdfgroup.org/licenses. * + * If you do not have access to either file, you may request a copy from * + * help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Author: Albert Cheng of NCSA, Oct 24, 2001. + */ + +#include "hdf5.h" + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> + +#ifdef H5_HAVE_UNISTD_H +#include <sys/types.h> +#include <unistd.h> +#endif + +#ifdef H5_HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif + +#ifdef H5_HAVE_PARALLEL + +#include <mpi.h> + +#ifndef MPI_FILE_NULL /*MPIO may be defined in mpi.h already */ +#include <mpio.h> +#endif /* !MPI_FILE_NULL */ + +#include "pio_perf.h" + +/* Macro definitions */ + +#if H5_VERS_MAJOR == 1 && H5_VERS_MINOR == 6 +#define H5DCREATE(fd, name, type, space, dcpl) H5Dcreate(fd, name, type, space, dcpl) +#define H5DOPEN(fd, name) H5Dopen(fd, name) +#else +#define H5DCREATE(fd, name, type, space, dcpl) \ + H5Dcreate2(fd, name, type, space, H5P_DEFAULT, dcpl, H5P_DEFAULT) +#define H5DOPEN(fd, name) H5Dopen2(fd, name, H5P_DEFAULT) +#endif + +/* sizes of various items. these sizes won't change during program execution */ +/* The following three must have the same type */ +#define ELMT_H5_TYPE H5T_NATIVE_UCHAR + +#define GOTOERROR(errcode) \ + { \ + ret_code = errcode; \ + goto done; \ + } +#define ERRMSG(mesg) \ + { \ + HDfprintf(stderr, "Proc %d: ", pio_mpi_rank_g); \ + HDfprintf(stderr, "*** Assertion failed (%s) at line %4d in %s\n", mesg, (int)__LINE__, __FILE__); \ + } + +/* verify: if val is false (0), print mesg. */ +#define VRFY(val, mesg) \ + do { \ + if (!val) { \ + ERRMSG(mesg); \ + GOTOERROR(FAIL); \ + } \ + } while (0) + +/* POSIX I/O macros */ +#ifdef H5_HAVE_WIN32_API +/* Can't link against the library, so this test will use the older, non-Unicode + * _open() call on Windows. + */ +#define HDopen(S, F, ...) _open(S, F | _O_BINARY, __VA_ARGS__) +#endif /* H5_HAVE_WIN32_API */ +#define POSIXCREATE(fn) HDopen(fn, O_CREAT | O_TRUNC | O_RDWR, 0600) +#define POSIXOPEN(fn, F) HDopen(fn, F, 0600) +#define POSIXCLOSE(F) HDclose(F) +#define POSIXSEEK(F, L) HDlseek(F, L, SEEK_SET) +#define POSIXWRITE(F, B, S) HDwrite(F, B, S) +#define POSIXREAD(F, B, S) HDread(F, B, S) + +enum { PIO_CREATE = 1, PIO_WRITE = 2, PIO_READ = 4 }; + +/* Global variables */ +static int clean_file_g = -1; /*whether to cleanup temporary test */ +/*files. -1 is not defined; */ +/*0 is no cleanup; 1 is do cleanup */ + +/* + * In a parallel machine, the filesystem suitable for compiling is + * unlikely a parallel file system that is suitable for parallel I/O. + * There is no standard pathname for the parallel file system. /tmp + * is about the best guess. + */ +#ifndef HDF5_PARAPREFIX +#define HDF5_PARAPREFIX "" +#endif /* !HDF5_PARAPREFIX */ + +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif /* !MIN */ + +/* the different types of file descriptors we can expect */ +typedef union _file_descr { + int posixfd; /* POSIX file handle*/ + MPI_File mpifd; /* MPI file */ + hid_t h5fd; /* HDF5 file */ +} file_descr; + +/* local functions */ +static char * pio_create_filename(iotype iot, const char *base_name, char *fullname, size_t size); +static herr_t do_write(results *res, file_descr *fd, parameters *parms, long ndsets, off_t nelmts, + size_t buf_size, void *buffer); +static herr_t do_read(results *res, file_descr *fd, parameters *parms, long ndsets, off_t nelmts, + size_t buf_size, void *buffer /*out*/); +static herr_t do_fopen(parameters *param, char *fname, file_descr *fd /*out*/, int flags); +static herr_t do_fclose(iotype iot, file_descr *fd); +static void do_cleanupfile(iotype iot, char *fname); +static off_t sqrto(off_t); + +/* + * Function: do_pio + * Purpose: PIO Engine where Parallel IO are executed. + * Return: results + * Programmer: Albert Cheng, Bill Wendling 2001/12/12 + * Modifications: + * Added 2D testing (Christian Chilan, 10. August 2005) + */ +results +do_pio(parameters param) +{ + /* return codes */ + herr_t ret_code = 0; /*return code */ + results res; + + file_descr fd; + iotype iot; + + char fname[FILENAME_MAX]; + long nf; + long ndsets; + off_t nbytes; /*number of bytes per dataset */ + off_t snbytes; /*general dataset size */ + /*for 1D, it is the actual dataset size */ + /*for 2D, it is the size of a side of the dataset square */ + char * buffer = NULL; /*data buffer pointer */ + size_t buf_size; /*general buffer size in bytes */ + /*for 1D, it is the actual buffer size */ + /*for 2D, it is the length of the buffer rectangle */ + size_t blk_size; /*data block size in bytes */ + size_t bsize; /*actual buffer size */ + + /* HDF5 variables */ + herr_t hrc; /*HDF5 return code */ + + /* Sanity check parameters */ + + /* IO type */ + iot = param.io_type; + + switch (iot) { + case MPIO: + fd.mpifd = MPI_FILE_NULL; + res.timers = io_time_new(MPI_CLOCK); + break; + case POSIXIO: + fd.posixfd = -1; + res.timers = io_time_new(MPI_CLOCK); + break; + case PHDF5: + fd.h5fd = -1; + res.timers = io_time_new(MPI_CLOCK); + break; + default: + /* unknown request */ + HDfprintf(stderr, "Unknown IO type request (%d)\n", iot); + GOTOERROR(FAIL); + } + + ndsets = param.num_dsets; /* number of datasets per file */ + nbytes = param.num_bytes; /* number of bytes per dataset */ + buf_size = param.buf_size; + blk_size = param.blk_size; + + if (!param.dim2d) { + snbytes = nbytes; /* General dataset size */ + bsize = buf_size; /* Actual buffer size */ + } + else { + snbytes = sqrto(nbytes); /* General dataset size */ + bsize = buf_size * blk_size; /* Actual buffer size */ + } + + if (param.num_files < 0) { + HDfprintf(stderr, "number of files must be >= 0 (%ld)\n", param.num_files); + GOTOERROR(FAIL); + } + + if (ndsets < 0) { + HDfprintf(stderr, "number of datasets per file must be >= 0 (%ld)\n", ndsets); + GOTOERROR(FAIL); + } + + if (param.num_procs <= 0) { + HDfprintf(stderr, "maximum number of process to use must be > 0 (%d)\n", param.num_procs); + GOTOERROR(FAIL); + } + + /* Validate transfer buffer size & block size*/ + if (blk_size <= 0) { + HDfprintf(stderr, "Transfer block size (%zu) must be > 0\n", blk_size); + GOTOERROR(FAIL); + } + if (buf_size <= 0) { + HDfprintf(stderr, "Transfer buffer size (%zu) must be > 0\n", buf_size); + GOTOERROR(FAIL); + } + if ((buf_size % blk_size) != 0) { + HDfprintf(stderr, + "Transfer buffer size (%zu) must be a multiple of the " + "interleaved I/O block size (%zu)\n", + buf_size, blk_size); + GOTOERROR(FAIL); + } + if ((snbytes % pio_mpi_nprocs_g) != 0) { + HDfprintf(stderr, + "Dataset size (%" H5_PRINTF_LL_WIDTH "d) must be a multiple of the " + "number of processes (%d)\n", + (long long)snbytes, pio_mpi_nprocs_g); + GOTOERROR(FAIL); + } + + if (!param.dim2d) { + if (((size_t)(snbytes / pio_mpi_nprocs_g) % buf_size) != 0) { + HDfprintf(stderr, + "Dataset size/process (%" H5_PRINTF_LL_WIDTH "d) must be a multiple of the " + "trasfer buffer size (%zu)\n", + (long long)(snbytes / pio_mpi_nprocs_g), buf_size); + GOTOERROR(FAIL); + } + } + else { + if (((size_t)snbytes % buf_size) != 0) { + HDfprintf(stderr, + "Dataset side size (%" H5_PRINTF_LL_WIDTH "d) must be a multiple of the " + "trasfer buffer size (%zu)\n", + (long long)snbytes, buf_size); + GOTOERROR(FAIL); + } + } + + /* Allocate transfer buffer */ + if ((buffer = malloc(bsize)) == NULL) { + HDfprintf(stderr, "malloc for transfer buffer size (%zu) failed\n", bsize); + GOTOERROR(FAIL); + } + + if (pio_debug_level >= 4) { + int myrank; + + MPI_Comm_rank(pio_comm_g, &myrank); + + /* output all of the times for all iterations */ + if (myrank == 0) + HDfprintf(output, "Timer details:\n"); + } + + for (nf = 1; nf <= param.num_files; nf++) { + /* + * Write performance measurement + */ + /* Open file for write */ + char base_name[256]; + + HDsprintf(base_name, "#pio_tmp_%lu", nf); + pio_create_filename(iot, base_name, fname, sizeof(fname)); + if (pio_debug_level > 0) + HDfprintf(output, "rank %d: data filename=%s\n", pio_mpi_rank_g, fname); + + /* Need barrier to make sure everyone starts at the same time */ + MPI_Barrier(pio_comm_g); + + io_time_set(res.timers, HDF5_GROSS_WRITE_FIXED_DIMS, TSTART); + hrc = do_fopen(¶m, fname, &fd, PIO_CREATE | PIO_WRITE); + + VRFY((hrc == SUCCESS), "do_fopen failed"); + + io_time_set(res.timers, HDF5_FINE_WRITE_FIXED_DIMS, TSTART); + hrc = do_write(&res, &fd, ¶m, ndsets, nbytes, buf_size, buffer); + io_time_set(res.timers, HDF5_FINE_WRITE_FIXED_DIMS, TSTOP); + + VRFY((hrc == SUCCESS), "do_write failed"); + + /* Close file for write */ + hrc = do_fclose(iot, &fd); + + io_time_set(res.timers, HDF5_GROSS_WRITE_FIXED_DIMS, TSTOP); + VRFY((hrc == SUCCESS), "do_fclose failed"); + + if (!param.h5_write_only) { + /* + * Read performance measurement + */ + /* Need barrier to make sure everyone is done writing and has + * closed the file. Also to make sure everyone starts reading + * at the same time. + */ + MPI_Barrier(pio_comm_g); + + /* Open file for read */ + io_time_set(res.timers, HDF5_GROSS_READ_FIXED_DIMS, TSTART); + hrc = do_fopen(¶m, fname, &fd, PIO_READ); + + VRFY((hrc == SUCCESS), "do_fopen failed"); + + io_time_set(res.timers, HDF5_FINE_READ_FIXED_DIMS, TSTART); + hrc = do_read(&res, &fd, ¶m, ndsets, nbytes, buf_size, buffer); + io_time_set(res.timers, HDF5_FINE_READ_FIXED_DIMS, TSTOP); + VRFY((hrc == SUCCESS), "do_read failed"); + + /* Close file for read */ + hrc = do_fclose(iot, &fd); + + io_time_set(res.timers, HDF5_GROSS_READ_FIXED_DIMS, TSTOP); + VRFY((hrc == SUCCESS), "do_fclose failed"); + } + + /* Need barrier to make sure everyone is done with the file */ + /* before it may be removed by do_cleanupfile */ + MPI_Barrier(pio_comm_g); + do_cleanupfile(iot, fname); + } + +done: + /* clean up */ + /* release HDF5 objects */ + + /* close any opened files */ + /* no remove(fname) because that should have happened normally. */ + switch (iot) { + case POSIXIO: + if (fd.posixfd != -1) + hrc = do_fclose(iot, &fd); + break; + case MPIO: + if (fd.mpifd != MPI_FILE_NULL) + hrc = do_fclose(iot, &fd); + break; + case PHDF5: + if (fd.h5fd != -1) + hrc = do_fclose(iot, &fd); + break; + default: + break; + } + + /* release generic resources */ + if (buffer) + HDfree(buffer); + res.ret_code = ret_code; + return res; +} + +/* + * Function: pio_create_filename + * Purpose: Create a new filename to write to. Determine the correct + * suffix to append to the filename by the type of I/O we're + * doing. Also, place in the /tmp/{$USER,$LOGIN} directory if + * USER or LOGIN are specified in the environment. + * Return: Pointer to filename or NULL + * Programmer: Bill Wendling, 21. November 2001 + * Modifications: + */ +static char * +pio_create_filename(iotype iot, const char *base_name, char *fullname, size_t size) +{ + const char *prefix, *suffix = ""; + char * ptr, last = '\0'; + size_t i, j; + + if (!base_name || !fullname || size < 1) + return NULL; + + HDmemset(fullname, 0, size); + + switch (iot) { + case POSIXIO: + suffix = ".posix"; + break; + case MPIO: + suffix = ".mpio"; + break; + case PHDF5: + suffix = ".h5"; + break; + default: + break; + } + + /* First use the environment variable and then try the constant */ + prefix = HDgetenv("HDF5_PARAPREFIX"); + +#ifdef HDF5_PARAPREFIX + if (!prefix) + prefix = HDF5_PARAPREFIX; +#endif /* HDF5_PARAPREFIX */ + + /* Prepend the prefix value to the base name */ + if (prefix && *prefix) { + /* If the prefix specifies the HDF5_PARAPREFIX directory, then + * default to using the "/tmp/$USER" or "/tmp/$LOGIN" + * directory instead. */ + register char *user, *login, *subdir; + + user = HDgetenv("USER"); + login = HDgetenv("LOGIN"); + subdir = (user ? user : login); + + if (subdir) { + for (i = 0; i < size - 1 && prefix[i]; i++) + fullname[i] = prefix[i]; + + fullname[i++] = '/'; + + for (j = 0; i < size && subdir[j]; i++, j++) + fullname[i] = subdir[j]; + } + else { + /* We didn't append the prefix yet */ + HDstrncpy(fullname, prefix, size); + fullname[size - 1] = '\0'; + } + + if ((HDstrlen(fullname) + HDstrlen(base_name) + 1) < size) { + /* Append the base_name with a slash first. Multiple slashes are + * handled below. */ + h5_stat_t buf; + + if (HDstat(fullname, &buf) < 0) + /* The directory doesn't exist just yet */ + if (HDmkdir(fullname, (mode_t)0755) < 0 && errno != EEXIST) { + /* We couldn't make the "/tmp/${USER,LOGIN}" subdirectory. + * Default to PREFIX's original prefix value. */ + HDstrcpy(fullname, prefix); + } + + HDstrcat(fullname, "/"); + HDstrcat(fullname, base_name); + } + else { + /* Buffer is too small */ + return NULL; + } + } + else if (HDstrlen(base_name) >= size) { + /* Buffer is too small */ + return NULL; + } + else { + HDstrcpy(fullname, base_name); + } + + /* Append a suffix */ + if (suffix) { + if (HDstrlen(fullname) + HDstrlen(suffix) >= size) + return NULL; + + HDstrcat(fullname, suffix); + } + + /* Remove any double slashes in the filename */ + for (ptr = fullname, i = j = 0; ptr && i < size; i++, ptr++) { + if (*ptr != '/' || last != '/') + fullname[j++] = *ptr; + + last = *ptr; + } + + return fullname; +} + +/* + * Function: do_write + * Purpose: Write the required amount of data to the file. + * Return: SUCCESS or FAIL + * Programmer: Albert Cheng, Bill Wendling, 2001/12/13 + * Modifications: + * Added 2D testing (Christian Chilan, 10. August 2005) + */ +static herr_t +do_write(results *res, file_descr *fd, parameters *parms, long ndsets, off_t nbytes, size_t buf_size, + void *buffer) +{ + int ret_code = SUCCESS; + int rc; /*routine return code */ + long ndset; + size_t blk_size; /* The block size to subdivide the xfer buffer into */ + off_t nbytes_xfer; /* Total number of bytes transferred so far */ + size_t nbytes_xfer_advance; /* Number of bytes transferred in a single I/O operation */ + size_t nbytes_toxfer; /* Number of bytes to transfer a particular time */ + char dname[64]; + off_t dset_offset = 0; /*dataset offset in a file */ + off_t bytes_begin[2]; /*first elmt this process transfer */ + off_t bytes_count; /*number of elmts this process transfer */ + off_t snbytes = 0; /*size of a side of the dataset square */ + unsigned char *buf_p; /* Current buffer pointer */ + + /* POSIX variables */ + off_t file_offset; /* File offset of the next transfer */ + off_t file_offset_advance; /* File offset advance after each I/O operation */ + off_t posix_file_offset; /* Base file offset of the next transfer */ + + /* MPI variables */ + MPI_Offset mpi_file_offset; /* Base file offset of the next transfer*/ + MPI_Offset mpi_offset; /* Offset in MPI file */ + MPI_Offset mpi_offset_advance; /* Offset advance after each I/O operation */ + MPI_Datatype mpi_file_type; /* MPI derived type for 1D file */ + MPI_Datatype mpi_blk_type; /* MPI derived type for 1D buffer */ + MPI_Datatype mpi_cont_type; /* MPI derived type for 2D contiguous file */ + MPI_Datatype mpi_partial_buffer_cont; /* MPI derived type for partial 2D contiguous buffer */ + MPI_Datatype mpi_inter_type; /* MPI derived type for 2D interleaved file */ + MPI_Datatype mpi_partial_buffer_inter; /* MPI derived type for partial 2D interleaved buffer */ + MPI_Datatype mpi_full_buffer; /* MPI derived type for 2D full buffer */ + MPI_Datatype mpi_full_chunk; /* MPI derived type for 2D full chunk */ + MPI_Datatype mpi_chunk_inter_type; /* MPI derived type for 2D chunk interleaved file */ + MPI_Datatype mpi_collective_type; /* Generic MPI derived type for 2D collective access */ + MPI_Status mpi_status; + int mrc; /* MPI return code */ + + /* HDF5 variables */ + herr_t hrc; /*HDF5 return code */ + hsize_t h5dims[2]; /*dataset dim sizes */ + hid_t h5dset_space_id = H5I_INVALID_HID; /*dataset space ID */ + hid_t h5mem_space_id = H5I_INVALID_HID; /*memory dataspace ID */ + hid_t h5ds_id = H5I_INVALID_HID; /*dataset handle */ + hsize_t h5block[2]; /*dataspace selection */ + hsize_t h5stride[2]; + hsize_t h5count[2]; + hsize_t h5start[2]; + hssize_t h5offset[2]; /* Selection offset within dataspace */ + hid_t h5dcpl = H5I_INVALID_HID; /* Dataset creation property list */ + hid_t h5dxpl = H5I_INVALID_HID; /* Dataset transfer property list */ + + /* Get the parameters from the parameter block */ + blk_size = parms->blk_size; + + /* There are two kinds of transfer patterns, contiguous and interleaved. + * Let 0,1,2,...,n be data accessed by process 0,1,2,...,n + * where n is rank of the last process. + * In contiguous pattern, data are accessed as + * 000...111...222...nnn... + * In interleaved pattern, data are accessed as + * 012...n012...n... + * These are all in the scope of one dataset. + */ + + /* 1D dataspace */ + if (!parms->dim2d) { + /* Contiguous Pattern: */ + if (!parms->interleaved) { + bytes_begin[0] = (off_t)(((double)nbytes * pio_mpi_rank_g) / pio_mpi_nprocs_g); + } /* end if */ + /* Interleaved Pattern: */ + else { + bytes_begin[0] = (off_t)(blk_size * (size_t)pio_mpi_rank_g); + } /* end else */ + + /* Prepare buffer for verifying data */ + if (parms->verify) + memset(buffer, pio_mpi_rank_g + 1, buf_size); + } /* end if */ + /* 2D dataspace */ + else { + /* nbytes is always the number of bytes per dataset (1D or 2D). If the + dataspace is 2D, snbytes is the size of a side of the dataset square. + */ + snbytes = sqrto(nbytes); + + /* Contiguous Pattern: */ + if (!parms->interleaved) { + bytes_begin[0] = (off_t)((double)snbytes * pio_mpi_rank_g / pio_mpi_nprocs_g); + bytes_begin[1] = 0; + } /* end if */ + /* Interleaved Pattern: */ + else { + bytes_begin[0] = 0; + + if (!parms->h5_use_chunks || parms->io_type == PHDF5) + bytes_begin[1] = (off_t)(blk_size * (size_t)pio_mpi_rank_g); + else + bytes_begin[1] = (off_t)(blk_size * blk_size * (size_t)pio_mpi_rank_g); + } /* end else */ + + /* Prepare buffer for verifying data */ + if (parms->verify) + HDmemset(buffer, pio_mpi_rank_g + 1, buf_size * blk_size); + } /* end else */ + + /* Calculate the total number of bytes (bytes_count) to be + * transferred by this process. It may be different for different + * transfer pattern due to rounding to integral values. + */ + /* + * Calculate the beginning bytes of this process and the next. + * bytes_count is the difference between these two beginnings. + * This way, it eliminates any rounding errors. + * (This is tricky, don't mess with the formula, rounding errors + * can easily get introduced) */ + bytes_count = (off_t)(((double)nbytes * (pio_mpi_rank_g + 1)) / pio_mpi_nprocs_g) - + (off_t)(((double)nbytes * pio_mpi_rank_g) / pio_mpi_nprocs_g); + + /* debug */ + if (pio_debug_level >= 4) { + HDprint_rank(output); + if (!parms->dim2d) { + HDfprintf(output, + "Debug(do_write): " + "buf_size=%zu, bytes_begin=%" H5_PRINTF_LL_WIDTH "d, bytes_count=%" H5_PRINTF_LL_WIDTH + "d\n", + buf_size, (long long)bytes_begin[0], (long long)bytes_count); + } + else { + HDfprintf(output, + "Debug(do_write): " + "linear buf_size=%zu, bytes_begin=(%" H5_PRINTF_LL_WIDTH "d,%" H5_PRINTF_LL_WIDTH + "d), bytes_count=%" H5_PRINTF_LL_WIDTH "d\n", + buf_size * blk_size, (long long)bytes_begin[0], (long long)bytes_begin[1], + (long long)bytes_count); + } + } + + /* I/O Access specific setup */ + switch (parms->io_type) { + case POSIXIO: + /* No extra setup */ + break; + + case MPIO: /* MPI-I/O setup */ + /* 1D dataspace */ + if (!parms->dim2d) { + /* Build block's derived type */ + mrc = MPI_Type_contiguous((int)blk_size, MPI_BYTE, &mpi_blk_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Build file's derived type */ + mrc = MPI_Type_vector((int)(buf_size / blk_size), (int)1, (int)pio_mpi_nprocs_g, mpi_blk_type, + &mpi_file_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit file type */ + mrc = MPI_Type_commit(&mpi_file_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Commit buffer type */ + mrc = MPI_Type_commit(&mpi_blk_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + } /* end if */ + /* 2D dataspace */ + else { + /* Build partial buffer derived type for contiguous access */ + + mrc = MPI_Type_contiguous((int)buf_size, MPI_BYTE, &mpi_partial_buffer_cont); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit partial buffer derived type */ + mrc = MPI_Type_commit(&mpi_partial_buffer_cont); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build contiguous file's derived type */ + mrc = MPI_Type_vector((int)blk_size, (int)1, (int)((size_t)snbytes / buf_size), + mpi_partial_buffer_cont, &mpi_cont_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit contiguous file type */ + mrc = MPI_Type_commit(&mpi_cont_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build partial buffer derived type for interleaved access */ + mrc = MPI_Type_contiguous((int)blk_size, MPI_BYTE, &mpi_partial_buffer_inter); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit partial buffer derived type */ + mrc = MPI_Type_commit(&mpi_partial_buffer_inter); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build interleaved file's derived type */ + mrc = MPI_Type_vector((int)buf_size, (int)1, (int)((size_t)snbytes / blk_size), + mpi_partial_buffer_inter, &mpi_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit interleaved file type */ + mrc = MPI_Type_commit(&mpi_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build full buffer derived type */ + mrc = MPI_Type_contiguous((int)(blk_size * buf_size), MPI_BYTE, &mpi_full_buffer); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit full buffer derived type */ + mrc = MPI_Type_commit(&mpi_full_buffer); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build full chunk derived type */ + mrc = MPI_Type_contiguous((int)(blk_size * blk_size), MPI_BYTE, &mpi_full_chunk); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit full chunk derived type */ + mrc = MPI_Type_commit(&mpi_full_chunk); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build chunk interleaved file's derived type */ + mrc = MPI_Type_vector((int)(buf_size / blk_size), (int)1, (int)((size_t)snbytes / blk_size), + mpi_full_chunk, &mpi_chunk_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit chunk interleaved file type */ + mrc = MPI_Type_commit(&mpi_chunk_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + } /* end else */ + break; + + case PHDF5: /* HDF5 setup */ + /* 1D dataspace */ + if (!parms->dim2d) { + if (nbytes > 0) { + /* define a contiguous dataset of nbytes native bytes */ + h5dims[0] = (hsize_t)nbytes; + h5dset_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + + /* Set up the file dset space id to select the pattern to access */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5stride[0] = h5block[0] = blk_size; + h5count[0] = buf_size / blk_size; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5stride[0] = blk_size * (size_t)pio_mpi_nprocs_g; + h5block[0] = blk_size; + h5count[0] = buf_size / blk_size; + } /* end else */ + hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, h5start, h5stride, h5count, + h5block); + VRFY((hrc >= 0), "H5Sselect_hyperslab"); + } /* end if */ + else { + h5dset_space_id = H5Screate(H5S_SCALAR); + VRFY((h5dset_space_id >= 0), "H5Screate"); + } /* end else */ + + /* Create the memory dataspace that corresponds to the xfer buffer */ + if (buf_size > 0) { + h5dims[0] = buf_size; + h5mem_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + } /* end if */ + else { + h5mem_space_id = H5Screate(H5S_SCALAR); + VRFY((h5mem_space_id >= 0), "H5Screate"); + } /* end else */ + } /* end if */ + /* 2D dataspace */ + else { + if (nbytes > 0) { + /* define a contiguous dataset of nbytes native bytes */ + h5dims[0] = (hsize_t)snbytes; + h5dims[1] = (hsize_t)snbytes; + h5dset_space_id = H5Screate_simple(2, h5dims, NULL); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + + /* Set up the file dset space id to select the pattern to access */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5start[1] = (hsize_t)bytes_begin[1]; + h5stride[0] = 1; + h5stride[1] = h5block[0] = h5block[1] = blk_size; + h5count[0] = 1; + h5count[1] = buf_size / blk_size; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5start[1] = (hsize_t)bytes_begin[1]; + h5stride[0] = blk_size; + h5stride[1] = blk_size * (size_t)pio_mpi_nprocs_g; + h5block[0] = h5block[1] = blk_size; + h5count[0] = buf_size / blk_size; + h5count[1] = 1; + } /* end else */ + hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, h5start, h5stride, h5count, + h5block); + VRFY((hrc >= 0), "H5Sselect_hyperslab"); + } /* end if */ + else { + h5dset_space_id = H5Screate(H5S_SCALAR); + VRFY((h5dset_space_id >= 0), "H5Screate"); + } /* end else */ + + /* Create the memory dataspace that corresponds to the xfer buffer */ + if (buf_size > 0) { + if (!parms->interleaved) { + h5dims[0] = blk_size; + h5dims[1] = buf_size; + } + else { + h5dims[0] = buf_size; + h5dims[1] = blk_size; + } + h5mem_space_id = H5Screate_simple(2, h5dims, NULL); + VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + } /* end if */ + else { + h5mem_space_id = H5Screate(H5S_SCALAR); + VRFY((h5mem_space_id >= 0), "H5Screate"); + } /* end else */ + } /* end else */ + + /* Create the dataset transfer property list */ + h5dxpl = H5Pcreate(H5P_DATASET_XFER); + if (h5dxpl < 0) { + HDfprintf(stderr, "HDF5 Property List Create failed\n"); + GOTOERROR(FAIL); + } + + /* Change to collective I/O, if asked */ + if (parms->collective) { + hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } /* end if */ + } /* end if */ + break; + + default: + break; + } /* end switch */ + + for (ndset = 1; ndset <= ndsets; ++ndset) { + + /* Calculate dataset offset within a file */ + + /* create dataset */ + switch (parms->io_type) { + case POSIXIO: + case MPIO: + /* both posix and mpi io just need dataset offset in file*/ + dset_offset = (ndset - 1) * nbytes; + break; + + case PHDF5: + h5dcpl = H5Pcreate(H5P_DATASET_CREATE); + if (h5dcpl < 0) { + HDfprintf(stderr, "HDF5 Property List Create failed\n"); + GOTOERROR(FAIL); + } + /* 1D dataspace */ + if (!parms->dim2d) { + /* Make the dataset chunked if asked */ + if (parms->h5_use_chunks) { + /* Set the chunk size to be the same as the buffer size */ + h5dims[0] = blk_size; + hrc = H5Pset_chunk(h5dcpl, 1, h5dims); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } /* end if */ + } /* end if */ + } /* end if */ + else { + /* 2D dataspace */ + if (parms->h5_use_chunks) { + /* Set the chunk size to be the same as the block size */ + h5dims[0] = blk_size; + h5dims[1] = blk_size; + hrc = H5Pset_chunk(h5dcpl, 2, h5dims); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } /* end if */ + } /* end if */ + } /* end else */ + + HDsprintf(dname, "Dataset_%ld", ndset); + h5ds_id = H5DCREATE(fd->h5fd, dname, ELMT_H5_TYPE, h5dset_space_id, h5dcpl); + + if (h5ds_id < 0) { + HDfprintf(stderr, "HDF5 Dataset Create failed\n"); + GOTOERROR(FAIL); + } + + hrc = H5Pclose(h5dcpl); + /* verifying the close of the dcpl */ + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Property List Close failed\n"); + GOTOERROR(FAIL); + } + break; + + default: + break; + } + + /* The task is to transfer bytes_count bytes, starting at + * bytes_begin position, using transfer buffer of buf_size bytes. + * If interleaved, select buf_size at a time, in round robin + * fashion, according to number of process. Otherwise, select + * all bytes_count in contiguous. + */ + nbytes_xfer = 0; + + /* 1D dataspace */ + if (!parms->dim2d) { + /* Set base file offset for all I/O patterns and POSIX access */ + posix_file_offset = dset_offset + bytes_begin[0]; + + /* Set base file offset for all I/O patterns and MPI access */ + mpi_file_offset = (MPI_Offset)(dset_offset + bytes_begin[0]); + } /* end if */ + else { + /* Set base file offset for all I/O patterns and POSIX access */ + posix_file_offset = dset_offset + bytes_begin[0] * snbytes + bytes_begin[1]; + + /* Set base file offset for all I/O patterns and MPI access */ + mpi_file_offset = (MPI_Offset)(dset_offset + bytes_begin[0] * snbytes + bytes_begin[1]); + } /* end else */ + + /* Start "raw data" write timer */ + io_time_set(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, TSTART); + + while (nbytes_xfer < bytes_count) { + /* Write */ + /* Calculate offset of write within a dataset/file */ + switch (parms->io_type) { + case POSIXIO: + /* 1D dataspace */ + if (!parms->dim2d) { + /* Contiguous pattern */ + if (!parms->interleaved) { + /* Compute file offset */ + file_offset = posix_file_offset + (off_t)nbytes_xfer; + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc == 0), "POSIXSEEK"); + + /* check if all bytes are written */ + rc = ((ssize_t)buf_size == POSIXWRITE(fd->posixfd, buffer, buf_size)); + VRFY((rc != 0), "POSIXWRITE"); + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size; + + /* Loop over the buffers to write */ + while (nbytes_toxfer > 0) { + /* Skip offset over blocks of other processes */ + file_offset = posix_file_offset + (off_t)(nbytes_xfer * pio_mpi_nprocs_g); + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc == 0), "POSIXSEEK"); + + /* check if all bytes are written */ + rc = ((ssize_t)blk_size == POSIXWRITE(fd->posixfd, buf_p, blk_size)); + VRFY((rc != 0), "POSIXWRITE"); + + /* Advance location in buffer */ + buf_p += blk_size; + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)blk_size; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= blk_size; + } /* end while */ + } /* end else */ + } /* end if */ + /* 2D dataspace */ + else { + /* Contiguous storage */ + if (!parms->h5_use_chunks) { + /* Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute file offset */ + file_offset = posix_file_offset + + (off_t)((((size_t)nbytes_xfer / blk_size) / (size_t)snbytes) * + (blk_size * (size_t)snbytes) + + (((size_t)nbytes_xfer / blk_size) % (size_t)snbytes)); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = buf_size; + + /* Global offset advance after each I/O operation */ + file_offset_advance = (off_t)snbytes; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Compute file offset */ + file_offset = + posix_file_offset + + (off_t)(((((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) / + (size_t)snbytes) * + (buf_size * (size_t)snbytes) + + (((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) % + (size_t)snbytes); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size; + + /* Global offset advance after each I/O operation */ + file_offset_advance = (off_t)snbytes; + } /* end else */ + } /* end if */ + /* Chunked storage */ + else { + /*Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute file offset */ + file_offset = posix_file_offset + (off_t)nbytes_xfer; + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * buf_size; + + /* Global offset advance after each I/O operation */ + file_offset_advance = 0; + } /* end if */ + /*Interleaved access pattern */ + else { + /* Compute file offset */ + /* Before simplification */ + /* file_offset=posix_file_offset+(off_t)((nbytes_xfer/(buf_size/blk_size) + *pio_mpi_nprocs_g)/(snbytes/blk_size*(blk_size*blk_size))*(buf_size/blk_size + *snbytes/blk_size*(blk_size*blk_size))+((nbytes_xfer/(buf_size/blk_size)) + *pio_mpi_nprocs_g)%(snbytes/blk_size*(blk_size*blk_size))); */ + + file_offset = posix_file_offset + + (off_t)((((size_t)nbytes_xfer / (buf_size / blk_size) * + (size_t)pio_mpi_nprocs_g) / + ((size_t)snbytes * blk_size)) * + (buf_size * (size_t)snbytes) + + (((size_t)nbytes_xfer / (buf_size / blk_size)) * + (size_t)pio_mpi_nprocs_g) % + ((size_t)snbytes * blk_size)); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * blk_size; + + /* Global offset advance after each I/O operation */ + /* file_offset_advance = (off_t)(snbytes/blk_size*(blk_size*blk_size)); */ + file_offset_advance = (off_t)snbytes * (off_t)blk_size; + } /* end else */ + } /* end else */ + + /* Common code for file access */ + + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size * blk_size; + + /* Loop over portions of the buffer to write */ + while (nbytes_toxfer > 0) { + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc == 0), "POSIXSEEK"); + + /* check if all bytes are written */ + rc = ((ssize_t)nbytes_xfer_advance == + POSIXWRITE(fd->posixfd, buf_p, nbytes_xfer_advance)); + VRFY((rc != 0), "POSIXWRITE"); + + /* Advance location in buffer */ + buf_p += nbytes_xfer_advance; + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)nbytes_xfer_advance; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= nbytes_xfer_advance; + + /* Partially advance file offset */ + file_offset += file_offset_advance; + } /* end while */ + + } /* end else */ + + break; + + case MPIO: + /* 1D dataspace */ + if (!parms->dim2d) { + /* Independent file access */ + if (!parms->collective) { + /* Contiguous pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + nbytes_xfer; + + /* Perform independent write */ + mrc = + MPI_File_write_at(fd->mpifd, mpi_offset, buffer, + (int)(buf_size / blk_size), mpi_blk_type, &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size; + + /* Loop over the buffers to write */ + while (nbytes_toxfer > 0) { + /* Skip offset over blocks of other processes */ + mpi_offset = mpi_file_offset + (nbytes_xfer * pio_mpi_nprocs_g); + + /* Perform independent write */ + mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buf_p, (int)1, + mpi_blk_type, &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance location in buffer */ + buf_p += blk_size; + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)blk_size; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= blk_size; + } /* end while */ + } /* end else */ + } /* end if */ + /* Collective file access */ + else { + /* Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + nbytes_xfer; + + /* Perform independent write */ + mrc = MPI_File_write_at_all(fd->mpifd, mpi_offset, buffer, + (int)(buf_size / blk_size), mpi_blk_type, + &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + (nbytes_xfer * pio_mpi_nprocs_g); + + /* Set the file view */ + mrc = MPI_File_set_view(fd->mpifd, mpi_offset, mpi_blk_type, mpi_file_type, + (char *)"native", h5_io_info_g); + VRFY((mrc == MPI_SUCCESS), "MPIO_VIEW"); + + /* Perform write */ + mrc = MPI_File_write_at_all(fd->mpifd, 0, buffer, (int)(buf_size / blk_size), + mpi_blk_type, &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)buf_size; + } /* end else */ + } /* end else */ + } /* end if */ + /* 2D dataspace */ + else { + /* Contiguous storage */ + if (!parms->h5_use_chunks) { + /* Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = + mpi_file_offset + + (MPI_Offset)((((size_t)nbytes_xfer / blk_size) / (size_t)snbytes) * + (blk_size * (size_t)snbytes)) + + (MPI_Offset)(((size_t)nbytes_xfer / blk_size) % (size_t)snbytes); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = buf_size; + + /* Global offset advance after each I/O operation */ + mpi_offset_advance = snbytes; + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_cont_type; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Compute offset in file */ + mpi_offset = + mpi_file_offset + + (MPI_Offset)( + ((((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) / + (size_t)snbytes) * + (buf_size * (size_t)snbytes)) + + (MPI_Offset)( + (((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) % + (size_t)snbytes); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size; + + /* Global offset advance after each I/O operation */ + mpi_offset_advance = snbytes; + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_inter_type; + } /* end else */ + } /* end if */ + /* Chunked storage */ + else { + /*Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + nbytes_xfer; + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * buf_size; + + /* Global offset advance after each I/O operation */ + mpi_offset_advance = 0; + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_full_buffer; + } /* end if */ + /*Interleaved access pattern */ + else { + /* Compute offset in file */ + /* Before simplification */ + /* mpi_offset=mpi_file_offset+(nbytes_xfer/(buf_size/blk_size) + *pio_mpi_nprocs_g)/(snbytes/blk_size*(blk_size*blk_size))* + (buf_size/blk_size*snbytes/blk_size*(blk_size*blk_size))+ + ((nbytes_xfer/(buf_size/blk_size))*pio_mpi_nprocs_g)%(snbytes + /blk_size*(blk_size*blk_size)); */ + mpi_offset = mpi_file_offset + + (MPI_Offset)((((size_t)nbytes_xfer / (buf_size / blk_size) * + (size_t)pio_mpi_nprocs_g) / + ((size_t)snbytes * blk_size)) * + (buf_size * (size_t)snbytes)) + + (MPI_Offset)((((size_t)nbytes_xfer / (buf_size / blk_size)) * + (size_t)pio_mpi_nprocs_g) % + ((size_t)snbytes * blk_size)); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * blk_size; + + /* Global offset advance after each I/O operation */ + /* mpi_offset_advance = (MPI_Offset)(snbytes/blk_size*(blk_size*blk_size)); */ + mpi_offset_advance = (MPI_Offset)((size_t)snbytes * blk_size); + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_chunk_inter_type; + } /* end else */ + } /* end else */ + + /* Common code for independent file access */ + if (!parms->collective) { + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size * blk_size; + + /* Loop over portions of the buffer to write */ + while (nbytes_toxfer > 0) { + /* Perform independent write */ + mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buf_p, + (int)nbytes_xfer_advance, MPI_BYTE, &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance location in buffer */ + buf_p += nbytes_xfer_advance; + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)nbytes_xfer_advance; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= nbytes_xfer_advance; + + /* Partially advance global offset in dataset */ + mpi_offset += mpi_offset_advance; + } /* end while */ + } /* end if */ + + /* Common code for collective file access */ + else { + /* Set the file view */ + mrc = MPI_File_set_view(fd->mpifd, mpi_offset, MPI_BYTE, mpi_collective_type, + (char *)"native", h5_io_info_g); + VRFY((mrc == MPI_SUCCESS), "MPIO_VIEW"); + + /* Perform write */ + MPI_File_write_at_all(fd->mpifd, 0, buffer, (int)(buf_size * blk_size), MPI_BYTE, + &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)buf_size * (off_t)blk_size; + } /* end else */ + + } /* end else */ + + break; + + case PHDF5: + /* 1D dataspace */ + if (!parms->dim2d) { + /* Set up the file dset space id to move the selection to process */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5offset[0] = nbytes_xfer; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5offset[0] = (nbytes_xfer * pio_mpi_nprocs_g); + } /* end else */ + hrc = H5Soffset_simple(h5dset_space_id, h5offset); + VRFY((hrc >= 0), "H5Soffset_simple"); + + /* Write the buffer out */ + hrc = + H5Dwrite(h5ds_id, ELMT_H5_TYPE, h5mem_space_id, h5dset_space_id, h5dxpl, buffer); + VRFY((hrc >= 0), "H5Dwrite"); + + /* Increment number of bytes transferred */ + nbytes_xfer += (ssize_t)buf_size; + } /* end if */ + /* 2D dataspace */ + else { + /* Set up the file dset space id to move the selection to process */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5offset[0] = + (hssize_t)(((size_t)nbytes_xfer / ((size_t)snbytes * blk_size)) * blk_size); + h5offset[1] = + (hssize_t)(((size_t)nbytes_xfer % ((size_t)snbytes * blk_size)) / blk_size); + + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5offset[0] = (hssize_t)((((size_t)nbytes_xfer * (size_t)pio_mpi_nprocs_g) / + ((size_t)snbytes * buf_size)) * + buf_size); + h5offset[1] = (hssize_t)((((size_t)nbytes_xfer * (size_t)pio_mpi_nprocs_g) % + ((size_t)snbytes * buf_size)) / + buf_size); + + } /* end else */ + hrc = H5Soffset_simple(h5dset_space_id, h5offset); + VRFY((hrc >= 0), "H5Soffset_simple"); + + /* Write the buffer out */ + hrc = + H5Dwrite(h5ds_id, ELMT_H5_TYPE, h5mem_space_id, h5dset_space_id, h5dxpl, buffer); + VRFY((hrc >= 0), "H5Dwrite"); + + /* Increment number of bytes transferred */ + nbytes_xfer += (off_t)buf_size * (off_t)blk_size; + + } /* end else */ + + break; + + default: + break; + } /* switch (parms->io_type) */ + } /* end while */ + + /* Stop "raw data" write timer */ + io_time_set(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, TSTOP); + + /* Calculate write time */ + + /* Close dataset. Only HDF5 needs to do an explicit close. */ + if (parms->io_type == PHDF5) { + hrc = H5Dclose(h5ds_id); + + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Close failed\n"); + GOTOERROR(FAIL); + } + + h5ds_id = H5I_INVALID_HID; + } /* end if */ + } /* end for */ + +done: + /* release MPI-I/O objects */ + if (parms->io_type == MPIO) { + /* 1D dataspace */ + if (!parms->dim2d) { + /* Free file type */ + mrc = MPI_Type_free(&mpi_file_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free buffer type */ + mrc = MPI_Type_free(&mpi_blk_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + } /* end if */ + /* 2D dataspace */ + else { + /* Free partial buffer type for contiguous access */ + mrc = MPI_Type_free(&mpi_partial_buffer_cont); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free contiguous file type */ + mrc = MPI_Type_free(&mpi_cont_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free partial buffer type for interleaved access */ + mrc = MPI_Type_free(&mpi_partial_buffer_inter); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free interleaved file type */ + mrc = MPI_Type_free(&mpi_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free full buffer type */ + mrc = MPI_Type_free(&mpi_full_buffer); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free full chunk type */ + mrc = MPI_Type_free(&mpi_full_chunk); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free chunk interleaved file type */ + mrc = MPI_Type_free(&mpi_chunk_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + } /* end else */ + } /* end if */ + + /* release HDF5 objects */ + if (h5dset_space_id != -1) { + hrc = H5Sclose(h5dset_space_id); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Space Close failed\n"); + ret_code = FAIL; + } + else { + h5dset_space_id = H5I_INVALID_HID; + } + } + + if (h5mem_space_id != -1) { + hrc = H5Sclose(h5mem_space_id); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Memory Space Close failed\n"); + ret_code = FAIL; + } + else { + h5mem_space_id = H5I_INVALID_HID; + } + } + + if (h5dxpl != -1) { + hrc = H5Pclose(h5dxpl); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Transfer Property List Close failed\n"); + ret_code = FAIL; + } + else { + h5dxpl = H5I_INVALID_HID; + } + } + + return ret_code; +} + +static off_t +sqrto(off_t x) +{ + double root_x = sqrt((double)x); + return (off_t)root_x; +} + +/* + * Function: do_read + * Purpose: read the required amount of data from the file. + * Return: SUCCESS or FAIL + * Programmer: Albert Cheng 2001/12/13 + * Modifications: + * Added 2D testing (Christian Chilan, 10. August 2005) + */ +static herr_t +do_read(results *res, file_descr *fd, parameters *parms, long ndsets, off_t nbytes, size_t buf_size, + void *buffer /*out*/) +{ + int ret_code = SUCCESS; + int rc; /*routine return code */ + long ndset; + size_t blk_size; /* The block size to subdivide the xfer buffer into */ + size_t bsize; /* Size of the actual buffer */ + off_t nbytes_xfer; /* Total number of bytes transferred so far */ + size_t nbytes_xfer_advance; /* Number of bytes transferred in a single I/O operation */ + size_t nbytes_toxfer; /* Number of bytes to transfer a particular time */ + char dname[64]; + off_t dset_offset = 0; /*dataset offset in a file */ + off_t bytes_begin[2]; /*first elmt this process transfer */ + off_t bytes_count; /*number of elmts this process transfer */ + off_t snbytes = 0; /*size of a side of the dataset square */ + unsigned char *buf_p; /* Current buffer pointer */ + + /* POSIX variables */ + off_t file_offset; /* File offset of the next transfer */ + off_t file_offset_advance; /* File offset advance after each I/O operation */ + off_t posix_file_offset; /* Base file offset of the next transfer */ + + /* MPI variables */ + MPI_Offset mpi_file_offset; /* Base file offset of the next transfer*/ + MPI_Offset mpi_offset; /* Offset in MPI file */ + MPI_Offset mpi_offset_advance; /* Offset advance after each I/O operation */ + MPI_Datatype mpi_file_type; /* MPI derived type for 1D file */ + MPI_Datatype mpi_blk_type; /* MPI derived type for 1D buffer */ + MPI_Datatype mpi_cont_type; /* MPI derived type for 2D contiguous file */ + MPI_Datatype mpi_partial_buffer_cont; /* MPI derived type for partial 2D contiguous buffer */ + MPI_Datatype mpi_inter_type; /* MPI derived type for 2D interleaved file */ + MPI_Datatype mpi_partial_buffer_inter; /* MPI derived type for partial 2D interleaved buffer */ + MPI_Datatype mpi_full_buffer; /* MPI derived type for 2D full buffer */ + MPI_Datatype mpi_full_chunk; /* MPI derived type for 2D full chunk */ + MPI_Datatype mpi_chunk_inter_type; /* MPI derived type for 2D chunk interleaved file */ + MPI_Datatype mpi_collective_type; /* Generic MPI derived type for 2D collective access */ + MPI_Status mpi_status; + int mrc; /* MPI return code */ + + /* HDF5 variables */ + herr_t hrc; /*HDF5 return code */ + hsize_t h5dims[2]; /*dataset dim sizes */ + hid_t h5dset_space_id = H5I_INVALID_HID; /*dataset space ID */ + hid_t h5mem_space_id = H5I_INVALID_HID; /*memory dataspace ID */ + hid_t h5ds_id = H5I_INVALID_HID; /*dataset handle */ + hsize_t h5block[2]; /*dataspace selection */ + hsize_t h5stride[2]; + hsize_t h5count[2]; + hsize_t h5start[2]; + hssize_t h5offset[2]; /* Selection offset within dataspace */ + hid_t h5dxpl = H5I_INVALID_HID; /* Dataset transfer property list */ + + /* Get the parameters from the parameter block */ + blk_size = parms->blk_size; + + /* There are two kinds of transfer patterns, contiguous and interleaved. + * Let 0,1,2,...,n be data accessed by process 0,1,2,...,n + * where n is rank of the last process. + * In contiguous pattern, data are accessed as + * 000...111...222...nnn... + * In interleaved pattern, data are accessed as + * 012...n012...n... + * These are all in the scope of one dataset. + */ + + /* 1D dataspace */ + if (!parms->dim2d) { + bsize = buf_size; + /* Contiguous Pattern: */ + if (!parms->interleaved) { + bytes_begin[0] = (off_t)(((double)nbytes * pio_mpi_rank_g) / pio_mpi_nprocs_g); + } /* end if */ + /* Interleaved Pattern: */ + else { + bytes_begin[0] = (off_t)blk_size * (off_t)pio_mpi_rank_g; + } /* end else */ + } /* end if */ + /* 2D dataspace */ + else { + /* nbytes is always the number of bytes per dataset (1D or 2D). If the + dataspace is 2D, snbytes is the size of a side of the 'dataset square'. + */ + snbytes = sqrto(nbytes); + + bsize = buf_size * blk_size; + + /* Contiguous Pattern: */ + if (!parms->interleaved) { + bytes_begin[0] = (off_t)((double)snbytes * pio_mpi_rank_g / pio_mpi_nprocs_g); + bytes_begin[1] = 0; + } /* end if */ + /* Interleaved Pattern: */ + else { + bytes_begin[0] = 0; + + if (!parms->h5_use_chunks || parms->io_type == PHDF5) + bytes_begin[1] = (off_t)blk_size * (off_t)pio_mpi_rank_g; + else + bytes_begin[1] = (off_t)blk_size * (off_t)blk_size * (off_t)pio_mpi_rank_g; + } /* end else */ + } /* end else */ + + /* Calculate the total number of bytes (bytes_count) to be + * transferred by this process. It may be different for different + * transfer pattern due to rounding to integral values. + */ + /* + * Calculate the beginning bytes of this process and the next. + * bytes_count is the difference between these two beginnings. + * This way, it eliminates any rounding errors. + * (This is tricky, don't mess with the formula, rounding errors + * can easily get introduced) */ + bytes_count = (off_t)(((double)nbytes * (pio_mpi_rank_g + 1)) / pio_mpi_nprocs_g) - + (off_t)(((double)nbytes * pio_mpi_rank_g) / pio_mpi_nprocs_g); + + /* debug */ + if (pio_debug_level >= 4) { + HDprint_rank(output); + if (!parms->dim2d) { + HDfprintf(output, + "Debug(do_write): " + "buf_size=%zu, bytes_begin=%" H5_PRINTF_LL_WIDTH "d, bytes_count=%" H5_PRINTF_LL_WIDTH + "d\n", + buf_size, (long long)bytes_begin[0], (long long)bytes_count); + } + else { + HDfprintf(output, + "Debug(do_write): " + "linear buf_size=%zu, bytes_begin=(%" H5_PRINTF_LL_WIDTH "d,%" H5_PRINTF_LL_WIDTH + "d), bytes_count=%" H5_PRINTF_LL_WIDTH "d\n", + buf_size * blk_size, (long long)bytes_begin[0], (long long)bytes_begin[1], + (long long)bytes_count); + } + } + + /* I/O Access specific setup */ + switch (parms->io_type) { + case POSIXIO: + /* No extra setup */ + break; + + case MPIO: /* MPI-I/O setup */ + /* 1D dataspace */ + if (!parms->dim2d) { + /* Build block's derived type */ + mrc = MPI_Type_contiguous((int)blk_size, MPI_BYTE, &mpi_blk_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Build file's derived type */ + mrc = MPI_Type_vector((int)(buf_size / blk_size), (int)1, (int)pio_mpi_nprocs_g, mpi_blk_type, + &mpi_file_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit file type */ + mrc = MPI_Type_commit(&mpi_file_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Commit buffer type */ + mrc = MPI_Type_commit(&mpi_blk_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + } /* end if */ + /* 2D dataspace */ + else { + /* Build partial buffer derived type for contiguous access */ + mrc = MPI_Type_contiguous((int)buf_size, MPI_BYTE, &mpi_partial_buffer_cont); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit partial buffer derived type */ + mrc = MPI_Type_commit(&mpi_partial_buffer_cont); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build contiguous file's derived type */ + mrc = MPI_Type_vector((int)blk_size, (int)1, (int)((size_t)snbytes / buf_size), + mpi_partial_buffer_cont, &mpi_cont_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit contiguous file type */ + mrc = MPI_Type_commit(&mpi_cont_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build partial buffer derived type for interleaved access */ + mrc = MPI_Type_contiguous((int)blk_size, MPI_BYTE, &mpi_partial_buffer_inter); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit partial buffer derived type */ + mrc = MPI_Type_commit(&mpi_partial_buffer_inter); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build interleaved file's derived type */ + mrc = MPI_Type_vector((int)buf_size, (int)1, (int)((size_t)snbytes / blk_size), + mpi_partial_buffer_inter, &mpi_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit interleaved file type */ + mrc = MPI_Type_commit(&mpi_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build full buffer derived type */ + mrc = MPI_Type_contiguous((int)(blk_size * buf_size), MPI_BYTE, &mpi_full_buffer); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit full buffer derived type */ + mrc = MPI_Type_commit(&mpi_full_buffer); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build full chunk derived type */ + mrc = MPI_Type_contiguous((int)(blk_size * blk_size), MPI_BYTE, &mpi_full_chunk); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit full chunk derived type */ + mrc = MPI_Type_commit(&mpi_full_chunk); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build chunk interleaved file's derived type */ + mrc = MPI_Type_vector((int)(buf_size / blk_size), (int)1, (int)((size_t)snbytes / blk_size), + mpi_full_chunk, &mpi_chunk_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit chunk interleaved file type */ + mrc = MPI_Type_commit(&mpi_chunk_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + } /* end else */ + break; + + case PHDF5: /* HDF5 setup */ + /* 1D dataspace */ + if (!parms->dim2d) { + if (nbytes > 0) { + /* define a contiguous dataset of nbytes native bytes */ + h5dims[0] = (hsize_t)nbytes; + h5dset_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + + /* Set up the file dset space id to select the pattern to access */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5stride[0] = h5block[0] = blk_size; + h5count[0] = buf_size / blk_size; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5stride[0] = blk_size * (size_t)pio_mpi_nprocs_g; + h5block[0] = blk_size; + h5count[0] = buf_size / blk_size; + } /* end else */ + hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, h5start, h5stride, h5count, + h5block); + VRFY((hrc >= 0), "H5Sselect_hyperslab"); + } /* end if */ + else { + h5dset_space_id = H5Screate(H5S_SCALAR); + VRFY((h5dset_space_id >= 0), "H5Screate"); + } /* end else */ + + /* Create the memory dataspace that corresponds to the xfer buffer */ + if (buf_size > 0) { + h5dims[0] = buf_size; + h5mem_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + } /* end if */ + else { + h5mem_space_id = H5Screate(H5S_SCALAR); + VRFY((h5mem_space_id >= 0), "H5Screate"); + } /* end else */ + } /* end if */ + /* 2D dataspace */ + else { + if (nbytes > 0) { + /* define a contiguous dataset of nbytes native bytes */ + h5dims[0] = (hsize_t)snbytes; + h5dims[1] = (hsize_t)snbytes; + h5dset_space_id = H5Screate_simple(2, h5dims, NULL); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + + /* Set up the file dset space id to select the pattern to access */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5start[1] = (hsize_t)bytes_begin[1]; + h5stride[0] = 1; + h5stride[1] = h5block[0] = h5block[1] = blk_size; + h5count[0] = 1; + h5count[1] = buf_size / blk_size; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5start[1] = (hsize_t)bytes_begin[1]; + h5stride[0] = blk_size; + h5stride[1] = blk_size * (size_t)pio_mpi_nprocs_g; + h5block[0] = h5block[1] = blk_size; + h5count[0] = buf_size / blk_size; + h5count[1] = 1; + } /* end else */ + hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, h5start, h5stride, h5count, + h5block); + VRFY((hrc >= 0), "H5Sselect_hyperslab"); + } /* end if */ + else { + h5dset_space_id = H5Screate(H5S_SCALAR); + VRFY((h5dset_space_id >= 0), "H5Screate"); + } /* end else */ + + /* Create the memory dataspace that corresponds to the xfer buffer */ + if (buf_size > 0) { + if (!parms->interleaved) { + h5dims[0] = blk_size; + h5dims[1] = buf_size; + } + else { + h5dims[0] = buf_size; + h5dims[1] = blk_size; + } + h5mem_space_id = H5Screate_simple(2, h5dims, NULL); + VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + } /* end if */ + else { + h5mem_space_id = H5Screate(H5S_SCALAR); + VRFY((h5mem_space_id >= 0), "H5Screate"); + } /* end else */ + } /* end else */ + + /* Create the dataset transfer property list */ + h5dxpl = H5Pcreate(H5P_DATASET_XFER); + if (h5dxpl < 0) { + HDfprintf(stderr, "HDF5 Property List Create failed\n"); + GOTOERROR(FAIL); + } + + /* Change to collective I/O, if asked */ + if (parms->collective) { + hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } /* end if */ + } /* end if */ + break; + + default: + break; + } /* end switch */ + + for (ndset = 1; ndset <= ndsets; ++ndset) { + + /* Calculate dataset offset within a file */ + + /* create dataset */ + switch (parms->io_type) { + case POSIXIO: + case MPIO: + /* both posix and mpi io just need dataset offset in file*/ + dset_offset = (ndset - 1) * nbytes; + break; + + case PHDF5: + HDsprintf(dname, "Dataset_%ld", ndset); + h5ds_id = H5DOPEN(fd->h5fd, dname); + if (h5ds_id < 0) { + HDfprintf(stderr, "HDF5 Dataset open failed\n"); + GOTOERROR(FAIL); + } + break; + + default: + break; + } + + /* The task is to transfer bytes_count bytes, starting at + * bytes_begin position, using transfer buffer of buf_size bytes. + * If interleaved, select buf_size at a time, in round robin + * fashion, according to number of process. Otherwise, select + * all bytes_count in contiguous. + */ + nbytes_xfer = 0; + + /* 1D dataspace */ + if (!parms->dim2d) { + /* Set base file offset for all I/O patterns and POSIX access */ + posix_file_offset = dset_offset + bytes_begin[0]; + + /* Set base file offset for all I/O patterns and MPI access */ + mpi_file_offset = (MPI_Offset)(dset_offset + bytes_begin[0]); + } /* end if */ + else { + /* Set base file offset for all I/O patterns and POSIX access */ + posix_file_offset = dset_offset + bytes_begin[0] * snbytes + bytes_begin[1]; + + /* Set base file offset for all I/O patterns and MPI access */ + mpi_file_offset = (MPI_Offset)(dset_offset + bytes_begin[0] * snbytes + bytes_begin[1]); + } /* end else */ + + /* Start "raw data" read timer */ + io_time_set(res->timers, HDF5_RAW_READ_FIXED_DIMS, TSTART); + + while (nbytes_xfer < bytes_count) { + /* Read */ + /* Calculate offset of read within a dataset/file */ + switch (parms->io_type) { + case POSIXIO: + /* 1D dataspace */ + if (!parms->dim2d) { + /* Contiguous pattern */ + if (!parms->interleaved) { + /* Compute file offset */ + file_offset = posix_file_offset + (off_t)nbytes_xfer; + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc == 0), "POSIXSEEK"); + + /* check if all bytes are read */ + rc = ((ssize_t)buf_size == POSIXREAD(fd->posixfd, buffer, buf_size)); + VRFY((rc != 0), "POSIXREAD"); + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size; + + /* Loop over the buffers to read */ + while (nbytes_toxfer > 0) { + /* Skip offset over blocks of other processes */ + file_offset = posix_file_offset + (off_t)(nbytes_xfer * pio_mpi_nprocs_g); + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc == 0), "POSIXSEEK"); + + /* check if all bytes are read */ + rc = ((ssize_t)blk_size == POSIXREAD(fd->posixfd, buf_p, blk_size)); + VRFY((rc != 0), "POSIXREAD"); + + /* Advance location in buffer */ + buf_p += blk_size; + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)blk_size; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= blk_size; + } /* end while */ + } /* end else */ + } /* end if */ + /* 2D dataspace */ + else { + /* Contiguous storage */ + if (!parms->h5_use_chunks) { + /* Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute file offset */ + file_offset = posix_file_offset + + (off_t)((((size_t)nbytes_xfer / blk_size) / (size_t)snbytes) * + (blk_size * (size_t)snbytes) + + (((size_t)nbytes_xfer / blk_size) % (size_t)snbytes)); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = buf_size; + + /* Global offset advance after each I/O operation */ + file_offset_advance = (off_t)snbytes; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Compute file offset */ + file_offset = + posix_file_offset + + (off_t)(((((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) / + (size_t)snbytes) * + (buf_size * (size_t)snbytes) + + (((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) % + (size_t)snbytes); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size; + + /* Global offset advance after each I/O operation */ + file_offset_advance = (off_t)snbytes; + } /* end else */ + } /* end if */ + /* Chunked storage */ + else { + /*Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute file offset */ + file_offset = posix_file_offset + (off_t)nbytes_xfer; + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * buf_size; + + /* Global offset advance after each I/O operation */ + file_offset_advance = 0; + } /* end if */ + /*Interleaved access pattern */ + else { + /* Compute file offset */ + /* Before simplification */ + /* file_offset=posix_file_offset+(off_t)((nbytes_xfer/(buf_size/blk_size) + *pio_mpi_nprocs_g)/(snbytes/blk_size*(blk_size*blk_size))*(buf_size/blk_size + *snbytes/blk_size*(blk_size*blk_size))+((nbytes_xfer/(buf_size/blk_size)) + *pio_mpi_nprocs_g)%(snbytes/blk_size*(blk_size*blk_size))); */ + + file_offset = posix_file_offset + + (off_t)((((size_t)nbytes_xfer / (buf_size / blk_size) * + (size_t)pio_mpi_nprocs_g) / + ((size_t)snbytes * blk_size)) * + (buf_size * (size_t)snbytes) + + (((size_t)nbytes_xfer / (buf_size / blk_size)) * + (size_t)pio_mpi_nprocs_g) % + ((size_t)snbytes * blk_size)); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * blk_size; + + /* Global offset advance after each I/O operation */ + /* file_offset_advance = (off_t)(snbytes/blk_size*(blk_size*blk_size)); */ + file_offset_advance = (off_t)((size_t)snbytes * blk_size); + } /* end else */ + } /* end else */ + + /* Common code for file access */ + + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size * blk_size; + + /* Loop over portions of the buffer to read */ + while (nbytes_toxfer > 0) { + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc == 0), "POSIXSEEK"); + + /* check if all bytes are read */ + rc = ((ssize_t)nbytes_xfer_advance == + POSIXREAD(fd->posixfd, buf_p, nbytes_xfer_advance)); + VRFY((rc != 0), "POSIXREAD"); + + /* Advance location in buffer */ + buf_p += nbytes_xfer_advance; + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)nbytes_xfer_advance; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= nbytes_xfer_advance; + + /* Partially advance file offset */ + file_offset += file_offset_advance; + } /* end while */ + + } /* end else */ + break; + + case MPIO: + /* 1D dataspace */ + if (!parms->dim2d) { + /* Independent file access */ + if (!parms->collective) { + /* Contiguous pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + nbytes_xfer; + + /* Perform independent read */ + mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buffer, + (int)(buf_size / blk_size), mpi_blk_type, &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_READ"); + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size; + + /* Loop over the buffers to read */ + while (nbytes_toxfer > 0) { + /* Skip offset over blocks of other processes */ + mpi_offset = mpi_file_offset + (nbytes_xfer * pio_mpi_nprocs_g); + + /* Perform independent read */ + mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buf_p, (int)1, mpi_blk_type, + &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_READ"); + + /* Advance location in buffer */ + buf_p += blk_size; + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)blk_size; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= blk_size; + } /* end while */ + } /* end else */ + } /* end if */ + /* Collective file access */ + else { + /* Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + nbytes_xfer; + + /* Perform collective read */ + mrc = MPI_File_read_at_all(fd->mpifd, mpi_offset, buffer, + (int)(buf_size / blk_size), mpi_blk_type, + &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_READ"); + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + (nbytes_xfer * pio_mpi_nprocs_g); + + /* Set the file view */ + mrc = MPI_File_set_view(fd->mpifd, mpi_offset, mpi_blk_type, mpi_file_type, + (char *)"native", h5_io_info_g); + VRFY((mrc == MPI_SUCCESS), "MPIO_VIEW"); + + /* Perform collective read */ + mrc = MPI_File_read_at_all(fd->mpifd, 0, buffer, (int)(buf_size / blk_size), + mpi_blk_type, &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_READ"); + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)buf_size; + } /* end else */ + } /* end else */ + } /* end if */ + /* 2D dataspace */ + else { + /* Contiguous storage */ + if (!parms->h5_use_chunks) { + /* Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = + mpi_file_offset + + (MPI_Offset)((((size_t)nbytes_xfer / blk_size) / (size_t)snbytes) * + (blk_size * (size_t)snbytes)) + + (MPI_Offset)(((size_t)nbytes_xfer / blk_size) % (size_t)snbytes); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = buf_size; + + /* Global offset advance after each I/O operation */ + mpi_offset_advance = snbytes; + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_cont_type; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Compute offset in file */ + mpi_offset = + mpi_file_offset + + (MPI_Offset)( + ((((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) / + (size_t)snbytes) * + (buf_size * (size_t)snbytes)) + + (MPI_Offset)( + (((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) % + (size_t)snbytes); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size; + + /* Global offset advance after each I/O operation */ + mpi_offset_advance = snbytes; + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_inter_type; + } /* end else */ + } /* end if */ + /* Chunked storage */ + else { + /*Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + nbytes_xfer; + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * buf_size; + + /* Global offset advance after each I/O operation */ + mpi_offset_advance = 0; + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_full_buffer; + } /* end if */ + /*Interleaved access pattern */ + else { + /* Compute offset in file */ + /* Before simplification */ + /* mpi_offset=mpi_file_offset+(nbytes_xfer/(buf_size/blk_size) + *pio_mpi_nprocs_g)/(snbytes/blk_size*(blk_size*blk_size))* + (buf_size/blk_size*snbytes/blk_size*(blk_size*blk_size))+ + ((nbytes_xfer/(buf_size/blk_size))*pio_mpi_nprocs_g)%(snbytes + /blk_size*(blk_size*blk_size)); */ + mpi_offset = mpi_file_offset + + (MPI_Offset)((((size_t)nbytes_xfer / (buf_size / blk_size) * + (size_t)pio_mpi_nprocs_g) / + ((size_t)snbytes * blk_size)) * + (buf_size * (size_t)snbytes)) + + (MPI_Offset)((((size_t)nbytes_xfer / (buf_size / blk_size)) * + (size_t)pio_mpi_nprocs_g) % + ((size_t)snbytes * blk_size)); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * blk_size; + + /* Global offset advance after each I/O operation */ + /* mpi_offset_advance = (MPI_Offset)(snbytes/blk_size*(blk_size*blk_size)); */ + mpi_offset_advance = (MPI_Offset)((size_t)snbytes * blk_size); + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_chunk_inter_type; + } /* end else */ + } /* end else */ + + /* Common code for independent file access */ + if (!parms->collective) { + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size * blk_size; + + /* Loop over portions of the buffer to read */ + while (nbytes_toxfer > 0) { + /* Perform independent read */ + mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buf_p, (int)nbytes_xfer_advance, + MPI_BYTE, &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_READ"); + + /* Advance location in buffer */ + buf_p += nbytes_xfer_advance; + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)nbytes_xfer_advance; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= nbytes_xfer_advance; + + /* Partially advance global offset in dataset */ + mpi_offset += mpi_offset_advance; + } /* end while */ + } /* end if */ + + /* Common code for collective file access */ + else { + /* Set the file view */ + mrc = MPI_File_set_view(fd->mpifd, mpi_offset, MPI_BYTE, mpi_collective_type, + (char *)"native", h5_io_info_g); + VRFY((mrc == MPI_SUCCESS), "MPIO_VIEW"); + + /* Perform read */ + MPI_File_read_at_all(fd->mpifd, 0, buffer, (int)(buf_size * blk_size), MPI_BYTE, + &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_READ"); + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)buf_size * (off_t)blk_size; + } /* end else */ + + } /* end else */ + break; + + case PHDF5: + /* 1D dataspace */ + if (!parms->dim2d) { + /* Set up the file dset space id to move the selection to process */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5offset[0] = nbytes_xfer; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5offset[0] = (nbytes_xfer * pio_mpi_nprocs_g); + } /* end else */ + hrc = H5Soffset_simple(h5dset_space_id, h5offset); + VRFY((hrc >= 0), "H5Soffset_simple"); + + /* Read the buffer in */ + hrc = H5Dread(h5ds_id, ELMT_H5_TYPE, h5mem_space_id, h5dset_space_id, h5dxpl, buffer); + VRFY((hrc >= 0), "H5Dread"); + + /* Increment number of bytes transferred */ + nbytes_xfer += (off_t)buf_size; + } /* end if */ + /* 2D dataspace */ + else { + /* Set up the file dset space id to move the selection to process */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5offset[0] = + (hssize_t)(((size_t)nbytes_xfer / ((size_t)snbytes * blk_size)) * blk_size); + h5offset[1] = + (hssize_t)(((size_t)nbytes_xfer % ((size_t)snbytes * blk_size)) / blk_size); + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5offset[0] = (hssize_t)((((size_t)nbytes_xfer * (size_t)pio_mpi_nprocs_g) / + ((size_t)snbytes * buf_size)) * + buf_size); + h5offset[1] = (hssize_t)((((size_t)nbytes_xfer * (size_t)pio_mpi_nprocs_g) % + ((size_t)snbytes * buf_size)) / + buf_size); + + } /* end else */ + hrc = H5Soffset_simple(h5dset_space_id, h5offset); + VRFY((hrc >= 0), "H5Soffset_simple"); + + /* Write the buffer out */ + hrc = H5Dread(h5ds_id, ELMT_H5_TYPE, h5mem_space_id, h5dset_space_id, h5dxpl, buffer); + VRFY((hrc >= 0), "H5Dread"); + + /* Increment number of bytes transferred */ + nbytes_xfer += (off_t)buf_size * (off_t)blk_size; + + } /* end else */ + break; + + default: + break; + } /* switch (parms->io_type) */ + + /* Verify raw data, if asked */ + if (parms->verify) { + /* Verify data read */ + unsigned char *ucharptr = (unsigned char *)buffer; + size_t i; + int nerror = 0; + + for (i = 0; i < bsize; ++i) { + if (*ucharptr++ != pio_mpi_rank_g + 1) { + if (++nerror < 20) { + /* report at most 20 errors */ + HDprint_rank(output); + HDfprintf(output, + "read data error, expected (%d), " + "got (%d)\n", + pio_mpi_rank_g + 1, (int)*(ucharptr - 1)); + } /* end if */ + } /* end if */ + } /* end for */ + if (nerror >= 20) { + HDprint_rank(output); + HDfprintf(output, "..."); + HDfprintf(output, "total read data errors=%d\n", nerror); + } /* end if */ + } /* if (parms->verify) */ + + } /* end while */ + + /* Stop "raw data" read timer */ + io_time_set(res->timers, HDF5_RAW_READ_FIXED_DIMS, TSTOP); + + /* Calculate read time */ + + /* Close dataset. Only HDF5 needs to do an explicit close. */ + if (parms->io_type == PHDF5) { + hrc = H5Dclose(h5ds_id); + + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Close failed\n"); + GOTOERROR(FAIL); + } + + h5ds_id = H5I_INVALID_HID; + } /* end if */ + } /* end for */ + +done: + /* release MPI-I/O objects */ + if (parms->io_type == MPIO) { + /* 1D dataspace */ + if (!parms->dim2d) { + /* Free file type */ + mrc = MPI_Type_free(&mpi_file_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free buffer type */ + mrc = MPI_Type_free(&mpi_blk_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + } /* end if */ + /* 2D dataspace */ + else { + /* Free partial buffer type for contiguous access */ + mrc = MPI_Type_free(&mpi_partial_buffer_cont); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free contiguous file type */ + mrc = MPI_Type_free(&mpi_cont_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free partial buffer type for interleaved access */ + mrc = MPI_Type_free(&mpi_partial_buffer_inter); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free interleaved file type */ + mrc = MPI_Type_free(&mpi_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free full buffer type */ + mrc = MPI_Type_free(&mpi_full_buffer); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free full chunk type */ + mrc = MPI_Type_free(&mpi_full_chunk); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free chunk interleaved file type */ + mrc = MPI_Type_free(&mpi_chunk_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + } /* end else */ + } /* end if */ + + /* release HDF5 objects */ + if (h5dset_space_id != -1) { + hrc = H5Sclose(h5dset_space_id); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Space Close failed\n"); + ret_code = FAIL; + } + else { + h5dset_space_id = H5I_INVALID_HID; + } + } + + if (h5mem_space_id != -1) { + hrc = H5Sclose(h5mem_space_id); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Memory Space Close failed\n"); + ret_code = FAIL; + } + else { + h5mem_space_id = H5I_INVALID_HID; + } + } + + if (h5dxpl != -1) { + hrc = H5Pclose(h5dxpl); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Transfer Property List Close failed\n"); + ret_code = FAIL; + } + else { + h5dxpl = H5I_INVALID_HID; + } + } + + return ret_code; +} + +/* + * Function: do_fopen + * Purpose: Open the specified file. + * Return: SUCCESS or FAIL + * Programmer: Albert Cheng, Bill Wendling, 2001/12/13 + * Modifications: + */ +static herr_t +do_fopen(parameters *param, char *fname, file_descr *fd /*out*/, int flags) +{ + int ret_code = SUCCESS, mrc; + hid_t acc_tpl = H5I_INVALID_HID; /* file access templates */ + + switch (param->io_type) { + case POSIXIO: + if (flags & (PIO_CREATE | PIO_WRITE)) + fd->posixfd = POSIXCREATE(fname); + else + fd->posixfd = POSIXOPEN(fname, O_RDONLY); + + if (fd->posixfd < 0) { + HDfprintf(stderr, "POSIX File Open failed(%s)\n", fname); + GOTOERROR(FAIL); + } + + /* The perils of POSIX I/O in a parallel environment. The problem is: + * + * - Process n opens a file with truncation and then starts + * writing to the file. + * - Process m also opens the file with truncation, but after + * process n has already started to write to the file. Thus, + * all of the stuff process n wrote is now lost. + */ + MPI_Barrier(pio_comm_g); + + break; + + case MPIO: + if (flags & (PIO_CREATE | PIO_WRITE)) { + MPI_File_delete(fname, h5_io_info_g); + mrc = MPI_File_open(pio_comm_g, fname, MPI_MODE_CREATE | MPI_MODE_RDWR, h5_io_info_g, + &fd->mpifd); + + if (mrc != MPI_SUCCESS) { + HDfprintf(stderr, "MPI File Open failed(%s)\n", fname); + GOTOERROR(FAIL); + } + + /*since MPI_File_open with MPI_MODE_CREATE does not truncate */ + /*filesize , set size to 0 explicitedly. */ + mrc = MPI_File_set_size(fd->mpifd, (MPI_Offset)0); + if (mrc != MPI_SUCCESS) { + HDfprintf(stderr, "MPI_File_set_size failed\n"); + GOTOERROR(FAIL); + } + } + else { + mrc = MPI_File_open(pio_comm_g, fname, MPI_MODE_RDONLY, h5_io_info_g, &fd->mpifd); + if (mrc != MPI_SUCCESS) { + HDfprintf(stderr, "MPI File Open failed(%s)\n", fname); + GOTOERROR(FAIL); + } + } + + break; + + case PHDF5: + if ((acc_tpl = H5Pcreate(H5P_FILE_ACCESS)) < 0) { + HDfprintf(stderr, "HDF5 Property List Create failed\n"); + GOTOERROR(FAIL); + } + + /* Set the file driver to the MPI-IO driver */ + if (H5Pset_fapl_mpio(acc_tpl, pio_comm_g, h5_io_info_g) < 0) { + HDfprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } + + /* Set the alignment of objects in HDF5 file */ + if (H5Pset_alignment(acc_tpl, param->h5_thresh, param->h5_align) < 0) { + HDfprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } + + /* create the parallel file */ + if (flags & (PIO_CREATE | PIO_WRITE)) + fd->h5fd = H5Fcreate(fname, H5F_ACC_TRUNC, H5P_DEFAULT, acc_tpl); + else + fd->h5fd = H5Fopen(fname, H5F_ACC_RDONLY, acc_tpl); + if (fd->h5fd < 0) { + HDfprintf(stderr, "HDF5 File Create failed(%s)\n", fname); + GOTOERROR(FAIL); + } + + /* verifying the close of the acc_tpl */ + if (H5Pclose(acc_tpl) < 0) { + HDfprintf(stderr, "HDF5 Property List Close failed\n"); + GOTOERROR(FAIL); + } + + break; + + default: + break; + } + +done: + return ret_code; +} + +/* + * Function: do_fclose + * Purpose: Close the specified file descriptor. + * Return: SUCCESS or FAIL + * Programmer: Albert Cheng, Bill Wendling, 2001/12/13 + * Modifications: + */ +static herr_t +do_fclose(iotype iot, file_descr *fd /*out*/) +{ + herr_t ret_code = SUCCESS, hrc; + int mrc = 0, rc = 0; + + switch (iot) { + case POSIXIO: + rc = POSIXCLOSE(fd->posixfd); + + if (rc != 0) { + HDfprintf(stderr, "POSIX File Close failed\n"); + GOTOERROR(FAIL); + } + + fd->posixfd = -1; + break; + + case MPIO: + mrc = MPI_File_close(&fd->mpifd); + + if (mrc != MPI_SUCCESS) { + HDfprintf(stderr, "MPI File close failed\n"); + GOTOERROR(FAIL); + } + + fd->mpifd = MPI_FILE_NULL; + break; + + case PHDF5: + hrc = H5Fclose(fd->h5fd); + + if (hrc < 0) { + HDfprintf(stderr, "HDF5 File Close failed\n"); + GOTOERROR(FAIL); + } + + fd->h5fd = -1; + break; + + default: + break; + } + +done: + return ret_code; +} + +/* + * Function: do_fclose + * Purpose: Cleanup temporary file unless HDF5_NOCLEANUP is set. + * Only Proc 0 of the PIO communicator will do the cleanup. + * Other processes just return. + * Return: void + * Programmer: Albert Cheng 2001/12/12 + * Modifications: + */ +static void +do_cleanupfile(iotype iot, char *fname) +{ + if (pio_mpi_rank_g != 0) + return; + + if (clean_file_g == -1) + clean_file_g = (getenv("HDF5_NOCLEANUP") == NULL) ? 1 : 0; + + if (clean_file_g) { + switch (iot) { + case POSIXIO: + HDremove(fname); + break; + case MPIO: + case PHDF5: + MPI_File_delete(fname, h5_io_info_g); + break; + default: + break; + } + } +} + +#ifdef TIME_MPI +/* instrument the MPI_File_wrirte_xxx and read_xxx calls to measure + * pure time spent in MPI_File code. + */ +int +MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, + MPI_Status *status) +{ + int err; + io_time_set(timer_g, HDF5_MPI_READ, TSTART); + err = PMPI_File_read_at(fh, offset, buf, count, datatype, status); + io_time_set(timer_g, HDF5_MPI_READ, TSTOP); + return err; +} + +int +MPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, + MPI_Status *status) +{ + int err; + io_time_set(timer_g, HDF5_MPI_READ, TSTART); + err = PMPI_File_read_at_all(fh, offset, buf, count, datatype, status); + io_time_set(timer_g, HDF5_MPI_READ, TSTOP); + return err; +} + +int +MPI_File_write_at(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, + MPI_Status *status) +{ + int err; + io_time_set(timer_g, HDF5_MPI_WRITE, TSTART); + err = PMPI_File_write_at(fh, offset, buf, count, datatype, status); + io_time_set(timer_g, HDF5_MPI_WRITE, TSTOP); + return err; +} + +int +MPI_File_write_at_all(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, + MPI_Status *status) +{ + int err; + io_time_set(timer_g, HDF5_MPI_WRITE, TSTART); + err = PMPI_File_write_at_all(fh, offset, buf, count, datatype, status); + io_time_set(timer_g, HDF5_MPI_WRITE, TSTOP); + return err; +} + +#endif /* TIME_MPI */ +#endif /* H5_HAVE_PARALLEL */ diff --git a/tools/src/h5perf/pio_perf.c b/tools/src/h5perf/pio_perf.c new file mode 100644 index 0000000..96cba2d --- /dev/null +++ b/tools/src/h5perf/pio_perf.c @@ -0,0 +1,1834 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the root of the source code * + * distribution tree, or in https://www.hdfgroup.org/licenses. * + * If you do not have access to either file, you may request a copy from * + * help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Parallel HDF5 Performance Testing Code + * -------------------------------------- + * + * Portable code to test performance on the different platforms we support. + * This is what the report should look like: + * + * nprocs = Max#Procs + * IO API = POSIXIO + * # Files = 1, # of dsets = 1000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * # Files = 1, # of dsets = 3000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * + * . . . + * + * IO API = MPIO + * # Files = 1, # of dsets = 1000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * # Files = 1, # of dsets = 3000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * + * . . . + * + * IO API = PHDF5 + * # Files = 1, # of dsets = 1000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * # Files = 1, # of dsets = 3000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * + * . . . + * + * nprocs = Max#Procs / 2 + * + * . . . + * + */ + +/* system header files */ +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +#include "hdf5.h" + +#ifdef H5_HAVE_PARALLEL + +/* library header files */ +#include <mpi.h> + +/* our header files */ +#include "pio_perf.h" + +/* useful macros */ +#define TAB_SPACE 4 + +#define ONE_KB 1024 +#define ONE_MB (ONE_KB * ONE_KB) +#define ONE_GB (ONE_MB * ONE_KB) + +#define PIO_POSIX 0x1 +#define PIO_MPI 0x2 +#define PIO_HDF5 0x4 + +#ifdef STANDALONE +#define DBL_EPSILON 2.2204460492503131e-16 +#define H5_DBL_ABS_EQUAL(X, Y) (fabs((X) - (Y)) < DBL_EPSILON) +#endif + +/* report 0.0 in case t is zero too */ +#define MB_PER_SEC(bytes, t) (H5_DBL_ABS_EQUAL((t), 0.0) ? 0.0 : ((((double)bytes) / ONE_MB) / (t))) + +#ifndef TRUE +#define TRUE 1 +#endif /* TRUE */ +#ifndef FALSE +#define FALSE (!TRUE) +#endif /* FALSE */ + +/* global variables */ +FILE * output; /* output file */ +int comm_world_rank_g; /* my rank in MPI_COMM_RANK */ +int comm_world_nprocs_g; /* num. of processes of MPI_COMM_WORLD */ +MPI_Comm pio_comm_g; /* Communicator to run the PIO */ +int pio_mpi_rank_g; /* MPI rank of pio_comm_g */ +int pio_mpi_nprocs_g; /* Number of processes of pio_comm_g */ +int pio_debug_level = 0; /* The debug level: + * 0 - Off + * 1 - Minimal + * 2 - Some more + * 3 - Maximal + * 4 - Maximal & then some + */ + +/* local variables */ +static const char *progname = "h5perf"; + +#ifndef HDF5_PARAPREFIX +#define HDF5_PARAPREFIX "" +#endif +char * paraprefix = NULL; /* for command line option para-prefix */ +MPI_Info h5_io_info_g = MPI_INFO_NULL; /* MPI INFO object for IO */ + +/* + * Command-line options: The user can specify short or long-named + * parameters. The long-named ones can be partially spelled. When + * adding more, make sure that they don't clash with each other. + */ +#if 1 +static const char *s_opts = "a:A:B:cCd:D:e:F:ghi:Imno:p:P:stT:wx:X:"; +#else +static const char *s_opts = "a:A:bB:cCd:D:e:F:ghi:Imno:p:P:stT:wx:X:"; +#endif /* 1 */ +static struct h5_long_options l_opts[] = {{"align", require_arg, 'a'}, + {"alig", require_arg, 'a'}, + {"ali", require_arg, 'a'}, + {"al", require_arg, 'a'}, + {"api", require_arg, 'A'}, + {"ap", require_arg, 'A'}, +#if 0 + /* a sighting of the elusive binary option */ + { "binary", no_arg, 'b' }, + { "binar", no_arg, 'b' }, + { "bina", no_arg, 'b' }, + { "bin", no_arg, 'b' }, + { "bi", no_arg, 'b' }, +#endif /* 0 */ + {"block-size", require_arg, 'B'}, + {"block-siz", require_arg, 'B'}, + {"block-si", require_arg, 'B'}, + {"block-s", require_arg, 'B'}, + {"block-", require_arg, 'B'}, + {"block", require_arg, 'B'}, + {"bloc", require_arg, 'B'}, + {"blo", require_arg, 'B'}, + {"bl", require_arg, 'B'}, + {"chunk", no_arg, 'c'}, + {"chun", no_arg, 'c'}, + {"chu", no_arg, 'c'}, + {"ch", no_arg, 'c'}, + {"collective", no_arg, 'C'}, + {"collectiv", no_arg, 'C'}, + {"collecti", no_arg, 'C'}, + {"collect", no_arg, 'C'}, + {"collec", no_arg, 'C'}, + {"colle", no_arg, 'C'}, + {"coll", no_arg, 'C'}, + {"col", no_arg, 'C'}, + {"co", no_arg, 'C'}, + {"debug", require_arg, 'D'}, + {"debu", require_arg, 'D'}, + {"deb", require_arg, 'D'}, + {"de", require_arg, 'D'}, + {"geometry", no_arg, 'g'}, + {"geometr", no_arg, 'g'}, + {"geomet", no_arg, 'g'}, + {"geome", no_arg, 'g'}, + {"geom", no_arg, 'g'}, + {"geo", no_arg, 'g'}, + {"ge", no_arg, 'g'}, + {"help", no_arg, 'h'}, + {"hel", no_arg, 'h'}, + {"he", no_arg, 'h'}, + {"interleaved", require_arg, 'I'}, + {"interleave", require_arg, 'I'}, + {"interleav", require_arg, 'I'}, + {"interlea", require_arg, 'I'}, + {"interle", require_arg, 'I'}, + {"interl", require_arg, 'I'}, + {"inter", require_arg, 'I'}, + {"inte", require_arg, 'I'}, + {"int", require_arg, 'I'}, + {"in", require_arg, 'I'}, + {"max-num-processes", require_arg, 'P'}, + {"max-num-processe", require_arg, 'P'}, + {"max-num-process", require_arg, 'P'}, + {"max-num-proces", require_arg, 'P'}, + {"max-num-proce", require_arg, 'P'}, + {"max-num-proc", require_arg, 'P'}, + {"max-num-pro", require_arg, 'P'}, + {"max-num-pr", require_arg, 'P'}, + {"max-num-p", require_arg, 'P'}, + {"min-num-processes", require_arg, 'p'}, + {"min-num-processe", require_arg, 'p'}, + {"min-num-process", require_arg, 'p'}, + {"min-num-proces", require_arg, 'p'}, + {"min-num-proce", require_arg, 'p'}, + {"min-num-proc", require_arg, 'p'}, + {"min-num-pro", require_arg, 'p'}, + {"min-num-pr", require_arg, 'p'}, + {"min-num-p", require_arg, 'p'}, + {"max-xfer-size", require_arg, 'X'}, + {"max-xfer-siz", require_arg, 'X'}, + {"max-xfer-si", require_arg, 'X'}, + {"max-xfer-s", require_arg, 'X'}, + {"max-xfer", require_arg, 'X'}, + {"max-xfe", require_arg, 'X'}, + {"max-xf", require_arg, 'X'}, + {"max-x", require_arg, 'X'}, + {"min-xfer-size", require_arg, 'x'}, + {"min-xfer-siz", require_arg, 'x'}, + {"min-xfer-si", require_arg, 'x'}, + {"min-xfer-s", require_arg, 'x'}, + {"min-xfer", require_arg, 'x'}, + {"min-xfe", require_arg, 'x'}, + {"min-xf", require_arg, 'x'}, + {"min-x", require_arg, 'x'}, + {"num-bytes", require_arg, 'e'}, + {"num-byte", require_arg, 'e'}, + {"num-byt", require_arg, 'e'}, + {"num-by", require_arg, 'e'}, + {"num-b", require_arg, 'e'}, + {"num-dsets", require_arg, 'd'}, + {"num-dset", require_arg, 'd'}, + {"num-dse", require_arg, 'd'}, + {"num-ds", require_arg, 'd'}, + {"num-d", require_arg, 'd'}, + {"num-files", require_arg, 'F'}, + {"num-file", require_arg, 'F'}, + {"num-fil", require_arg, 'F'}, + {"num-fi", require_arg, 'F'}, + {"num-f", require_arg, 'F'}, + {"num-iterations", require_arg, 'i'}, + {"num-iteration", require_arg, 'i'}, + {"num-iteratio", require_arg, 'i'}, + {"num-iterati", require_arg, 'i'}, + {"num-iterat", require_arg, 'i'}, + {"num-itera", require_arg, 'i'}, + {"num-iter", require_arg, 'i'}, + {"num-ite", require_arg, 'i'}, + {"num-it", require_arg, 'i'}, + {"num-i", require_arg, 'i'}, + {"output", require_arg, 'o'}, + {"outpu", require_arg, 'o'}, + {"outp", require_arg, 'o'}, + {"out", require_arg, 'o'}, + {"ou", require_arg, 'o'}, + {"threshold", require_arg, 'T'}, + {"threshol", require_arg, 'T'}, + {"thresho", require_arg, 'T'}, + {"thresh", require_arg, 'T'}, + {"thres", require_arg, 'T'}, + {"thre", require_arg, 'T'}, + {"thr", require_arg, 'T'}, + {"th", require_arg, 'T'}, + {"write-only", require_arg, 'w'}, + {"write-onl", require_arg, 'w'}, + {"write-on", require_arg, 'w'}, + {"write-o", require_arg, 'w'}, + {"write", require_arg, 'w'}, + {"writ", require_arg, 'w'}, + {"wri", require_arg, 'w'}, + {"wr", require_arg, 'w'}, + {NULL, 0, '\0'}}; + +struct options { + long io_types; /* bitmask of which I/O types to test */ + const char *output_file; /* file to print report to */ + long num_dsets; /* number of datasets */ + long num_files; /* number of files */ + off_t num_bpp; /* number of bytes per proc per dset */ + int num_iters; /* number of iterations */ + int max_num_procs; /* maximum number of processes to use */ + int min_num_procs; /* minimum number of processes to use */ + size_t max_xfer_size; /* maximum transfer buffer size */ + size_t min_xfer_size; /* minimum transfer buffer size */ + size_t blk_size; /* Block size */ + unsigned interleaved; /* Interleaved vs. contiguous blocks */ + unsigned collective; /* Collective vs. independent I/O */ + unsigned dim2d; /* 1D vs. 2D geometry */ + int print_times; /* print times as well as throughputs */ + int print_raw; /* print raw data throughput info */ + off_t h5_alignment; /* alignment in HDF5 file */ + off_t h5_threshold; /* threshold for alignment in HDF5 file */ + int h5_use_chunks; /* Make HDF5 dataset chunked */ + int h5_write_only; /* Perform the write tests only */ + int verify; /* Verify data correctness */ +}; + +typedef struct _minmax { + double min; + double max; + double sum; + int num; +} minmax; + +/* local functions */ +static off_t parse_size_directive(const char *size); +static struct options *parse_command_line(int argc, char *argv[]); +static void run_test_loop(struct options *options); +static int run_test(iotype iot, parameters parms, struct options *opts); +static void output_all_info(minmax *mm, int count, int indent_level); +static void get_minmax(minmax *mm, double val); +static minmax accumulate_minmax_stuff(minmax *mm, int count); +static int create_comm_world(int num_procs, int *doing_pio); +static int destroy_comm_world(void); +static void output_results(const struct options *options, const char *name, minmax *table, int table_size, + off_t data_size); +static void output_times(const struct options *options, const char *name, minmax *table, int table_size); +static void output_report(const char *fmt, ...); +static void print_indent(register int indent); +static void usage(const char *prog); +static void report_parameters(struct options *opts); +static off_t squareo(off_t); + +/* + * Function: main + * Purpose: Start things up. Initialize MPI and then call the test looping + * function. + * Return: EXIT_SUCCESS or EXIT_FAILURE + * Programmer: Bill Wendling, 30. October 2001 + * Modifications: + */ +int +main(int argc, char *argv[]) +{ + int ret; + int exit_value = EXIT_SUCCESS; + struct options *opts = NULL; + +#ifndef STANDALONE + /* Initialize h5tools lib */ + h5tools_init(); +#endif + + output = stdout; + + /* initialize MPI and get the maximum num of processors we started with */ + MPI_Init(&argc, &argv); + ret = MPI_Comm_size(MPI_COMM_WORLD, &comm_world_nprocs_g); + + if (ret != MPI_SUCCESS) { + HDfprintf(stderr, "%s: MPI_Comm_size call failed\n", progname); + + if (ret == MPI_ERR_COMM) + HDfprintf(stderr, "invalid MPI communicator\n"); + else + HDfprintf(stderr, "invalid argument\n"); + + exit_value = EXIT_FAILURE; + goto finish; + } + + ret = MPI_Comm_rank(MPI_COMM_WORLD, &comm_world_rank_g); + + if (ret != MPI_SUCCESS) { + HDfprintf(stderr, "%s: MPI_Comm_rank call failed\n", progname); + + if (ret == MPI_ERR_COMM) + HDfprintf(stderr, "invalid MPI communicator\n"); + else + HDfprintf(stderr, "invalid argument\n"); + + exit_value = EXIT_FAILURE; + goto finish; + } + + pio_comm_g = MPI_COMM_WORLD; + + h5_set_info_object(); + opts = parse_command_line(argc, argv); + + if (!opts) { + exit_value = EXIT_FAILURE; + goto finish; + } + + if (opts->output_file) { + if ((output = HDfopen(opts->output_file, "w")) == NULL) { + HDfprintf(stderr, "%s: cannot open output file\n", progname); + perror(opts->output_file); + goto finish; + } + } + + if ((pio_debug_level == 0 && comm_world_rank_g == 0) || pio_debug_level > 0) + report_parameters(opts); + + run_test_loop(opts); + +finish: + MPI_Finalize(); + free(opts); + return exit_value; +} + +off_t +squareo(off_t x) +{ + return x * x; +} + +/* + * Function: run_test_loop + * Purpose: Run the I/O tests. Write the results to OUTPUT. + * + * - The slowest changing part of the test is the number of + * processors to use. For each loop iteration, we divide that + * number by 2 and rerun the test. + * + * - The second slowest is what type of IO API to perform. We have + * three choices: POSIXIO, MPI-IO, and PHDF5. + * + * - Then we change the size of the buffer. This information is + * inferred from the number of datasets to create and the number + * of integers to put into each dataset. The backend code figures + * this out. + * + * Return: Nothing + * Programmer: Bill Wendling, 30. October 2001 + * Modifications: + * Added 2D testing (Christian Chilan, 10. August 2005) + */ +static void +run_test_loop(struct options *opts) +{ + parameters parms; + int num_procs; + int doing_pio; /* if this process is doing PIO */ + + parms.num_files = opts->num_files; + parms.num_dsets = opts->num_dsets; + parms.num_iters = opts->num_iters; + parms.blk_size = opts->blk_size; + parms.interleaved = opts->interleaved; + parms.collective = opts->collective; + parms.dim2d = opts->dim2d; + parms.h5_align = (hsize_t)opts->h5_alignment; + parms.h5_thresh = (hsize_t)opts->h5_threshold; + parms.h5_use_chunks = opts->h5_use_chunks; + parms.h5_write_only = opts->h5_write_only; + parms.verify = opts->verify; + + /* start with max_num_procs and decrement it by half for each loop. */ + /* if performance needs restart, fewer processes may be needed. */ + for (num_procs = opts->max_num_procs; num_procs >= opts->min_num_procs; num_procs >>= 1) { + register size_t buf_size; + + parms.num_procs = num_procs; + + if (create_comm_world(parms.num_procs, &doing_pio) != SUCCESS) { + /* do something harsh */ + } + + /* only processes doing PIO will run the tests */ + if (doing_pio) { + output_report("Number of processors = %ld\n", parms.num_procs); + + /* multiply the xfer buffer size by 2 for each loop iteration */ + for (buf_size = opts->min_xfer_size; buf_size <= opts->max_xfer_size; buf_size <<= 1) { + parms.buf_size = buf_size; + + if (parms.dim2d) { + parms.num_bytes = squareo(opts->num_bpp * parms.num_procs); + if (parms.interleaved) + output_report("Transfer Buffer Size: %ldx%ld bytes, File size: %.2f MB\n", buf_size, + opts->blk_size, + ((double)parms.num_dsets * (double)parms.num_bytes) / ONE_MB); + else + output_report("Transfer Buffer Size: %ldx%ld bytes, File size: %.2f MB\n", + opts->blk_size, buf_size, + ((double)parms.num_dsets * (double)parms.num_bytes) / ONE_MB); + + print_indent(1); + output_report(" # of files: %ld, # of datasets: %ld, dataset size: %.2fx%.2f KB\n", + parms.num_files, parms.num_dsets, + (double)(opts->num_bpp * parms.num_procs) / ONE_KB, + (double)(opts->num_bpp * parms.num_procs) / ONE_KB); + } + else { + parms.num_bytes = (off_t)opts->num_bpp * parms.num_procs; + output_report("Transfer Buffer Size: %ld bytes, File size: %.2f MB\n", buf_size, + ((double)parms.num_dsets * (double)parms.num_bytes) / ONE_MB); + + print_indent(1); + output_report(" # of files: %ld, # of datasets: %ld, dataset size: %.2f MB\n", + parms.num_files, parms.num_dsets, + (double)(opts->num_bpp * parms.num_procs) / ONE_MB); + } + + if (opts->io_types & PIO_POSIX) + run_test(POSIXIO, parms, opts); + + if (opts->io_types & PIO_MPI) + run_test(MPIO, parms, opts); + + if (opts->io_types & PIO_HDF5) + run_test(PHDF5, parms, opts); + + /* Run the tests once if buf_size==0, but then break out */ + if (buf_size == 0) + break; + } + + if (destroy_comm_world() != SUCCESS) { + /* do something harsh */ + } + } + } +} + +/* + * Function: run_test + * Purpose: Inner loop call to actually run the I/O test. + * Return: Nothing + * Programmer: Bill Wendling, 18. December 2001 + * Modifications: + */ +static int +run_test(iotype iot, parameters parms, struct options *opts) +{ + results res; + register int i, ret_value = SUCCESS; + int comm_size; + off_t raw_size; + minmax * write_mpi_mm_table = NULL; + minmax * write_mm_table = NULL; + minmax * write_gross_mm_table = NULL; + minmax * write_raw_mm_table = NULL; + minmax * read_mpi_mm_table = NULL; + minmax * read_mm_table = NULL; + minmax * read_gross_mm_table = NULL; + minmax * read_raw_mm_table = NULL; + minmax * read_open_mm_table = NULL; + minmax * read_close_mm_table = NULL; + minmax * write_open_mm_table = NULL; + minmax * write_close_mm_table = NULL; + minmax write_mpi_mm = {0.0, 0.0, 0.0, 0}; + minmax write_mm = {0.0, 0.0, 0.0, 0}; + minmax write_gross_mm = {0.0, 0.0, 0.0, 0}; + minmax write_raw_mm = {0.0, 0.0, 0.0, 0}; + minmax read_mpi_mm = {0.0, 0.0, 0.0, 0}; + minmax read_mm = {0.0, 0.0, 0.0, 0}; + minmax read_gross_mm = {0.0, 0.0, 0.0, 0}; + minmax read_raw_mm = {0.0, 0.0, 0.0, 0}; + minmax read_open_mm = {0.0, 0.0, 0.0, 0}; + minmax read_close_mm = {0.0, 0.0, 0.0, 0}; + minmax write_open_mm = {0.0, 0.0, 0.0, 0}; + minmax write_close_mm = {0.0, 0.0, 0.0, 0}; + + raw_size = parms.num_files * (off_t)parms.num_dsets * (off_t)parms.num_bytes; + parms.io_type = iot; + print_indent(2); + output_report("IO API = "); + + switch (iot) { + case POSIXIO: + output_report("POSIX\n"); + break; + case MPIO: + output_report("MPIO\n"); + break; + case PHDF5: + output_report("PHDF5 (w/MPI-IO driver)\n"); + break; + default: + break; + } + + MPI_Comm_size(pio_comm_g, &comm_size); + + /* allocate space for tables minmax and that it is sufficient */ + /* to initialize all elements to zeros by calloc. */ + write_mpi_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); + write_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); + write_gross_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); + write_raw_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); + write_open_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); + write_close_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); + if (!parms.h5_write_only) { + read_mpi_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); + read_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); + read_gross_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); + read_raw_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); + read_open_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); + read_close_mm_table = calloc((size_t)parms.num_iters, sizeof(minmax)); + } + + /* Do IO iteration times, collecting statistics each time */ + for (i = 0; i < parms.num_iters; ++i) { + double t; + + MPI_Barrier(pio_comm_g); + res = do_pio(parms); + + /* gather all of the "mpi write" times */ + t = io_time_get(res.timers, HDF5_MPI_WRITE); + get_minmax(&write_mpi_mm, t); + + write_mpi_mm_table[i] = write_mpi_mm; + + /* gather all of the "write" times */ + t = io_time_get(res.timers, HDF5_FINE_WRITE_FIXED_DIMS); + get_minmax(&write_mm, t); + + write_mm_table[i] = write_mm; + + /* gather all of the "write" times from open to close */ + t = io_time_get(res.timers, HDF5_GROSS_WRITE_FIXED_DIMS); + get_minmax(&write_gross_mm, t); + + write_gross_mm_table[i] = write_gross_mm; + + /* gather all of the raw "write" times */ + t = io_time_get(res.timers, HDF5_RAW_WRITE_FIXED_DIMS); + get_minmax(&write_raw_mm, t); + + write_raw_mm_table[i] = write_raw_mm; + + /* gather all of the file open times (time from open to first write) */ + t = io_time_get(res.timers, HDF5_FILE_WRITE_OPEN); + get_minmax(&write_open_mm, t); + + write_open_mm_table[i] = write_open_mm; + + /* gather all of the file close times (time from last write to close) */ + t = io_time_get(res.timers, HDF5_FILE_WRITE_CLOSE); + get_minmax(&write_close_mm, t); + + write_close_mm_table[i] = write_close_mm; + + if (!parms.h5_write_only) { + /* gather all of the "mpi read" times */ + t = io_time_get(res.timers, HDF5_MPI_READ); + get_minmax(&read_mpi_mm, t); + + read_mpi_mm_table[i] = read_mpi_mm; + + /* gather all of the "read" times */ + t = io_time_get(res.timers, HDF5_FINE_READ_FIXED_DIMS); + get_minmax(&read_mm, t); + + read_mm_table[i] = read_mm; + + /* gather all of the "read" times from open to close */ + t = io_time_get(res.timers, HDF5_GROSS_READ_FIXED_DIMS); + get_minmax(&read_gross_mm, t); + + read_gross_mm_table[i] = read_gross_mm; + + /* gather all of the raw "read" times */ + t = io_time_get(res.timers, HDF5_RAW_READ_FIXED_DIMS); + get_minmax(&read_raw_mm, t); + + read_raw_mm_table[i] = read_raw_mm; + + /* gather all of the file open times (time from open to first read) */ + t = io_time_get(res.timers, HDF5_FILE_READ_OPEN); + get_minmax(&read_open_mm, t); + + read_open_mm_table[i] = read_open_mm; + + /* gather all of the file close times (time from last read to close) */ + t = io_time_get(res.timers, HDF5_FILE_READ_CLOSE); + get_minmax(&read_close_mm, t); + + read_close_mm_table[i] = read_close_mm; + } + + io_time_destroy(res.timers); + } + + /* + * Show various statistics + */ + /* Write statistics */ + /* Print the raw data throughput if desired */ + if (opts->print_raw) { + /* accumulate and output the max, min, and average "raw write" times */ + if (pio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Raw Data Write details:\n"); + output_all_info(write_raw_mm_table, parms.num_iters, 4); + } + + output_results(opts, "Raw Data Write", write_raw_mm_table, parms.num_iters, raw_size); + } /* end if */ + + /* show mpi write statics */ + if (pio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("MPI Write details:\n"); + output_all_info(write_mpi_mm_table, parms.num_iters, 4); + } + + /* We don't currently output the MPI write results */ + + /* accumulate and output the max, min, and average "write" times */ + if (pio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Write details:\n"); + output_all_info(write_mm_table, parms.num_iters, 4); + } + + output_results(opts, "Write", write_mm_table, parms.num_iters, raw_size); + + /* accumulate and output the max, min, and average "gross write" times */ + if (pio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Write Open-Close details:\n"); + output_all_info(write_gross_mm_table, parms.num_iters, 4); + } + + output_results(opts, "Write Open-Close", write_gross_mm_table, parms.num_iters, raw_size); + + if (opts->print_times) { + output_times(opts, "Write File Open", write_open_mm_table, parms.num_iters); + output_times(opts, "Write File Close", write_close_mm_table, parms.num_iters); + } + + /* Print out time from open to first write */ + if (pio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Write file open details:\n"); + output_all_info(write_open_mm_table, parms.num_iters, 4); + } + + /* Print out time from last write to close */ + if (pio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Write file close details:\n"); + output_all_info(write_close_mm_table, parms.num_iters, 4); + } + + if (!parms.h5_write_only) { + /* Read statistics */ + /* Print the raw data throughput if desired */ + if (opts->print_raw) { + /* accumulate and output the max, min, and average "raw read" times */ + if (pio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Raw Data Read details:\n"); + output_all_info(read_raw_mm_table, parms.num_iters, 4); + } + + output_results(opts, "Raw Data Read", read_raw_mm_table, parms.num_iters, raw_size); + } /* end if */ + + /* show mpi read statics */ + if (pio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("MPI Read details:\n"); + output_all_info(read_mpi_mm_table, parms.num_iters, 4); + } + + /* We don't currently output the MPI read results */ + + /* accumulate and output the max, min, and average "read" times */ + if (pio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Read details:\n"); + output_all_info(read_mm_table, parms.num_iters, 4); + } + + output_results(opts, "Read", read_mm_table, parms.num_iters, raw_size); + + /* accumulate and output the max, min, and average "gross read" times */ + if (pio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Read Open-Close details:\n"); + output_all_info(read_gross_mm_table, parms.num_iters, 4); + } + + output_results(opts, "Read Open-Close", read_gross_mm_table, parms.num_iters, raw_size); + + if (opts->print_times) { + output_times(opts, "Read File Open", read_open_mm_table, parms.num_iters); + output_times(opts, "Read File Close", read_close_mm_table, parms.num_iters); + } + + /* Print out time from open to first read */ + if (pio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Read file open details:\n"); + output_all_info(read_open_mm_table, parms.num_iters, 4); + } + + /* Print out time from last read to close */ + if (pio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Read file close details:\n"); + output_all_info(read_close_mm_table, parms.num_iters, 4); + } + } + + /* clean up our mess */ + free(write_mpi_mm_table); + free(write_mm_table); + free(write_gross_mm_table); + free(write_raw_mm_table); + free(write_open_mm_table); + free(write_close_mm_table); + + if (!parms.h5_write_only) { + free(read_mpi_mm_table); + free(read_mm_table); + free(read_gross_mm_table); + free(read_raw_mm_table); + free(read_open_mm_table); + free(read_close_mm_table); + } + + return ret_value; +} + +/* + * Function: output_all_info + * Purpose: + * Return: Nothing + * Programmer: Bill Wendling, 29. January 2002 + * Modifications: + */ +static void +output_all_info(minmax *mm, int count, int indent_level) +{ + int i; + + for (i = 0; i < count; ++i) { + print_indent(indent_level); + output_report("Iteration %d:\n", i + 1); + print_indent(indent_level + 1); + output_report("Minimum Time: %.2fs\n", mm[i].min); + print_indent(indent_level + 1); + output_report("Maximum Time: %.2fs\n", mm[i].max); + } +} + +/* + * Function: h5_set_info_object + * Purpose: Process environment variables setting to set up MPI Info + * object. + * Return: 0 if all is fine; otherwise non-zero. + * Programmer: Albert Cheng, 2002/05/21. + * Modifications: + * Bill Wendling, 2002/05/31 + * Modified so that the HDF5_MPI_INFO environment variable can + * be a semicolon separated list of "key=value" pairings. Most + * of the code is to remove any whitespaces which might be + * surrounding the "key=value" pairs. + */ +int +h5_set_info_object(void) +{ + char *envp; /* environment pointer */ + int ret_value = 0; + + /* handle any MPI INFO hints via $HDF5_MPI_INFO */ + if ((envp = HDgetenv("HDF5_MPI_INFO")) != NULL) { + char *next, *valp; + + valp = envp = next = HDstrdup(envp); + + if (!valp) + return 0; + + /* create an INFO object if not created yet */ + if (h5_io_info_g == MPI_INFO_NULL) + MPI_Info_create(&h5_io_info_g); + + do { + size_t len; + char * key_val, *endp, *namep; + + if (*valp == ';') + valp++; + + /* copy key/value pair into temporary buffer */ + len = strcspn(valp, ";"); + next = &valp[len]; + key_val = (char *)HDcalloc(1, len + 1); + + /* increment the next pointer past the terminating semicolon */ + if (*next == ';') + ++next; + + namep = HDstrncpy(key_val, valp, len); + + /* pass up any beginning whitespaces */ + while (*namep && (*namep == ' ' || *namep == '\t')) + namep++; + + if (!*namep) + continue; /* was all white space, so move to next k/v pair */ + + /* eat up any ending white spaces */ + endp = &namep[HDstrlen(namep) - 1]; + + while (endp && (*endp == ' ' || *endp == '\t')) + *endp-- = '\0'; + + /* find the '=' */ + valp = HDstrchr(namep, '='); + + if (valp != NULL) { /* it's a valid key/value pairing */ + char *tmp_val = valp + 1; + + /* change '=' to \0, move valp down one */ + *valp-- = '\0'; + + /* eat up ending whitespace on the "key" part */ + while (*valp == ' ' || *valp == '\t') + *valp-- = '\0'; + + valp = tmp_val; + + /* eat up beginning whitespace on the "value" part */ + while (*valp == ' ' || *valp == '\t') + *valp++ = '\0'; + + /* actually set the darned thing */ + if (MPI_SUCCESS != MPI_Info_set(h5_io_info_g, namep, valp)) { + HDprintf("MPI_Info_set failed\n"); + ret_value = -1; + } + } + + valp = next; + HDfree(key_val); + } while (next && *next); + + HDfree(envp); + } + + return ret_value; +} + +/* + * Function: h5_dump_info_object + * Purpose: Display content of an MPI Info object + * Return: void + * Programmer: Albert Cheng 2002/05/21 + * Modifications: + */ +void +h5_dump_info_object(MPI_Info info) +{ + char key[MPI_MAX_INFO_KEY + 1]; + char value[MPI_MAX_INFO_VAL + 1]; + int flag; + int i, nkeys; + + HDprintf("Dumping MPI Info Object (up to %d bytes per item):\n", MPI_MAX_INFO_VAL); + if (info == MPI_INFO_NULL) { + HDprintf("object is MPI_INFO_NULL\n"); + } + else { + MPI_Info_get_nkeys(info, &nkeys); + HDprintf("object has %d items\n", nkeys); + for (i = 0; i < nkeys; i++) { + MPI_Info_get_nthkey(info, i, key); + MPI_Info_get(info, key, MPI_MAX_INFO_VAL, value, &flag); + HDprintf("%s=%s\n", key, value); + } + } +} + +/* + * Function: get_minmax + * Purpose: Gather all the min, max and total of val. + * Return: Nothing + * Programmer: Bill Wendling, 21. December 2001 + * Modifications: + * Use MPI_Allreduce to do it. -akc, 2002/01/11 + */ +static void +get_minmax(minmax *mm, double val) +{ + int myrank; + + MPI_Comm_rank(pio_comm_g, &myrank); + MPI_Comm_size(pio_comm_g, &mm->num); + + MPI_Allreduce(&val, &mm->max, 1, MPI_DOUBLE, MPI_MAX, pio_comm_g); + MPI_Allreduce(&val, &mm->min, 1, MPI_DOUBLE, MPI_MIN, pio_comm_g); + MPI_Allreduce(&val, &mm->sum, 1, MPI_DOUBLE, MPI_SUM, pio_comm_g); +} + +/* + * Function: accumulate_minmax_stuff + * Purpose: Accumulate the minimum, maximum, and average of the times + * across all processes. + * Return: TOTAL_MM - the total of all of these. + * Programmer: Bill Wendling, 21. December 2001 + * Modifications: + * Changed to use seconds instead of MB/s - QAK, 5/9/02 + */ +static minmax +accumulate_minmax_stuff(minmax *mm, int count) +{ + int i; + minmax total_mm; + + total_mm.sum = 0.0f; + total_mm.max = -DBL_MAX; + total_mm.min = DBL_MAX; + total_mm.num = count; + + for (i = 0; i < count; ++i) { + double m = mm[i].max; + + total_mm.sum += m; + + if (m < total_mm.min) + total_mm.min = m; + + if (m > total_mm.max) + total_mm.max = m; + } + + return total_mm; +} + +/* + * Function: create_comm_world + * Purpose: Create an MPI Comm world and store it in pio_comm_g, which + * is a global variable. + * Return: SUCCESS on success. + * FAIL otherwise. + * Programmer: Bill Wendling, 19. December 2001 + * Modifications: + */ +static int +create_comm_world(int num_procs, int *doing_pio) +{ + /* MPI variables */ + int mrc; /* return values */ + int color; /* for communicator creation */ + int myrank, nprocs; + + pio_comm_g = MPI_COMM_NULL; + + /* + * Create a sub communicator for this PIO run. Easier to use the first N + * processes. + */ + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + + if (num_procs > nprocs) { + HDfprintf(stderr, "number of process(%d) must be <= number of processes in MPI_COMM_WORLD(%d)\n", + num_procs, nprocs); + goto error_done; + } + + MPI_Comm_rank(MPI_COMM_WORLD, &myrank); + color = (myrank < num_procs); + mrc = MPI_Comm_split(MPI_COMM_WORLD, color, myrank, &pio_comm_g); + + if (mrc != MPI_SUCCESS) { + HDfprintf(stderr, "MPI_Comm_split failed\n"); + goto error_done; + } + + if (!color) { + /* not involved in this run */ + mrc = destroy_comm_world(); + goto done; + } + + /* determine the MPI rank in the PIO communicator */ + MPI_Comm_size(pio_comm_g, &pio_mpi_nprocs_g); + MPI_Comm_rank(pio_comm_g, &pio_mpi_rank_g); + +done: + *doing_pio = color; + return SUCCESS; + +error_done: + destroy_comm_world(); + return FAIL; +} + +/* + * Function: destroy_comm_world + * Purpose: Destroy the created MPI Comm world which is stored in the + * pio_comm_g global variable. + * Return: SUCCESS on success. + * FAIL otherwise. + * Programmer: Bill Wendling, 19. December 2001 + * Modifications: + */ +static int +destroy_comm_world(void) +{ + int mrc = SUCCESS; /* return code */ + + /* release MPI resources */ + if (pio_comm_g != MPI_COMM_NULL) + mrc = (MPI_Comm_free(&pio_comm_g) == MPI_SUCCESS ? SUCCESS : FAIL); + + return mrc; +} + +/* + * Function: output_results + * Purpose: Print information about the time & bandwidth for a given + * minmax & # of iterations. + * Return: Nothing + * Programmer: Quincey Koziol, 9. May 2002 + * Modifications: + */ +static void +output_results(const struct options *opts, const char *name, minmax *table, int table_size, off_t data_size) +{ + minmax total_mm; + + total_mm = accumulate_minmax_stuff(table, table_size); + + print_indent(3); + output_report("%s (%d iteration(s)):\n", name, table_size); + + /* Note: The maximum throughput uses the minimum amount of time & vice versa */ + + print_indent(4); + output_report("Maximum Throughput: %6.2f MB/s", MB_PER_SEC(data_size, total_mm.min)); + if (opts->print_times) + output_report(" (%7.3f s)\n", total_mm.min); + else + output_report("\n"); + + print_indent(4); + output_report("Average Throughput: %6.2f MB/s", MB_PER_SEC(data_size, total_mm.sum / total_mm.num)); + if (opts->print_times) + output_report(" (%7.3f s)\n", (total_mm.sum / total_mm.num)); + else + output_report("\n"); + + print_indent(4); + output_report("Minimum Throughput: %6.2f MB/s", MB_PER_SEC(data_size, total_mm.max)); + if (opts->print_times) + output_report(" (%7.3f s)\n", total_mm.max); + else + output_report("\n"); +} + +static void +output_times(const struct options *opts, const char *name, minmax *table, int table_size) +{ + minmax total_mm; + + total_mm = accumulate_minmax_stuff(table, table_size); + + print_indent(3); + output_report("%s (%d iteration(s)):\n", name, table_size); + + /* Note: The maximum throughput uses the minimum amount of time & vice versa */ + + print_indent(4); + output_report("Minimum Accumulated Time using %d file(s): %7.5f s\n", opts->num_files, (total_mm.min)); + + print_indent(4); + output_report("Average Accumulated Time using %d file(s): %7.5f s\n", opts->num_files, + (total_mm.sum / total_mm.num)); + + print_indent(4); + output_report("Maximum Accumulated Time using %d file(s): %7.5f s\n", opts->num_files, (total_mm.max)); +} + +/* + * Function: output_report + * Purpose: Print a line of the report. Only do so if I'm the 0 process. + * Return: Nothing + * Programmer: Bill Wendling, 19. December 2001 + * Modifications: + */ +static void +output_report(const char *fmt, ...) +{ + int myrank; + + MPI_Comm_rank(pio_comm_g, &myrank); + + if (myrank == 0) { + va_list ap; + + HDva_start(ap, fmt); + HDvfprintf(output, fmt, ap); + HDva_end(ap); + } +} + +/* + * Function: print_indent + * Purpose: Print spaces to indent a new line of text for pretty printing + * things. + * Return: Nothing + * Programmer: Bill Wendling, 29. October 2001 + * Modifications: + */ +static void +print_indent(register int indent) +{ + int myrank; + + MPI_Comm_rank(pio_comm_g, &myrank); + + if (myrank == 0) { + indent *= TAB_SPACE; + + for (; indent > 0; --indent) + HDfputc(' ', output); + } +} + +static void +recover_size_and_print(long long val, const char *end) +{ + if (val >= ONE_KB && (val % ONE_KB) == 0) { + if (val >= ONE_MB && (val % ONE_MB) == 0) { + if (val >= ONE_GB && (val % ONE_GB) == 0) + HDfprintf(output, + "%" H5_PRINTF_LL_WIDTH "d" + "GB%s", + val / ONE_GB, end); + else + HDfprintf(output, + "%" H5_PRINTF_LL_WIDTH "d" + "MB%s", + val / ONE_MB, end); + } + else { + HDfprintf(output, + "%" H5_PRINTF_LL_WIDTH "d" + "KB%s", + val / ONE_KB, end); + } + } + else { + HDfprintf(output, + "%" H5_PRINTF_LL_WIDTH "d" + "%s", + val, end); + } +} + +static void +print_io_api(long io_types) +{ + if (io_types & PIO_POSIX) + HDfprintf(output, "posix "); + if (io_types & PIO_MPI) + HDfprintf(output, "mpiio "); + if (io_types & PIO_HDF5) + HDfprintf(output, "phdf5 "); + HDfprintf(output, "\n"); +} + +static void +report_parameters(struct options *opts) +{ + int rank = comm_world_rank_g; + + print_version("HDF5 Library"); /* print library version */ + HDfprintf(output, "rank %d: ==== Parameters ====\n", rank); + + HDfprintf(output, "rank %d: IO API=", rank); + print_io_api(opts->io_types); + + HDfprintf(output, "rank %d: Number of files=%ld\n", rank, opts->num_files); + HDfprintf(output, "rank %d: Number of datasets=%ld\n", rank, opts->num_dsets); + HDfprintf(output, "rank %d: Number of iterations=%d\n", rank, opts->num_iters); + HDfprintf(output, "rank %d: Number of processes=%d:%d\n", rank, opts->min_num_procs, opts->max_num_procs); + + if (opts->dim2d) { + HDfprintf(output, "rank %d: Number of bytes per process per dataset=", rank); + recover_size_and_print((long long)(opts->num_bpp * opts->num_bpp * opts->min_num_procs), ":"); + recover_size_and_print((long long)(opts->num_bpp * opts->num_bpp * opts->max_num_procs), "\n"); + + HDfprintf(output, "rank %d: Size of dataset(s)=", rank); + recover_size_and_print((long long)(opts->num_bpp * opts->min_num_procs), "x"); + recover_size_and_print((long long)(opts->num_bpp * opts->min_num_procs), ":"); + recover_size_and_print((long long)(opts->num_bpp * opts->max_num_procs), "x"); + recover_size_and_print((long long)(opts->num_bpp * opts->max_num_procs), "\n"); + + HDfprintf(output, "rank %d: File size=", rank); + recover_size_and_print((long long)(squareo(opts->num_bpp * opts->min_num_procs) * opts->num_dsets), + ":"); + recover_size_and_print((long long)(squareo(opts->num_bpp * opts->max_num_procs) * opts->num_dsets), + "\n"); + + HDfprintf(output, "rank %d: Transfer buffer size=", rank); + if (opts->interleaved) { + recover_size_and_print((long long)opts->min_xfer_size, "x"); + recover_size_and_print((long long)opts->blk_size, ":"); + recover_size_and_print((long long)opts->max_xfer_size, "x"); + recover_size_and_print((long long)opts->blk_size, "\n"); + } + else { + recover_size_and_print((long long)opts->blk_size, "x"); + recover_size_and_print((long long)opts->min_xfer_size, ":"); + recover_size_and_print((long long)opts->blk_size, "x"); + recover_size_and_print((long long)opts->max_xfer_size, "\n"); + } + HDfprintf(output, "rank %d: Block size=", rank); + recover_size_and_print((long long)opts->blk_size, "x"); + recover_size_and_print((long long)opts->blk_size, "\n"); + } + else { + HDfprintf(output, "rank %d: Number of bytes per process per dataset=", rank); + recover_size_and_print((long long)opts->num_bpp, "\n"); + + HDfprintf(output, "rank %d: Size of dataset(s)=", rank); + recover_size_and_print((long long)(opts->num_bpp * opts->min_num_procs), ":"); + recover_size_and_print((long long)(opts->num_bpp * opts->max_num_procs), "\n"); + + HDfprintf(output, "rank %d: File size=", rank); + recover_size_and_print((long long)(opts->num_bpp * opts->min_num_procs * opts->num_dsets), ":"); + recover_size_and_print((long long)(opts->num_bpp * opts->max_num_procs * opts->num_dsets), "\n"); + + HDfprintf(output, "rank %d: Transfer buffer size=", rank); + recover_size_and_print((long long)opts->min_xfer_size, ":"); + recover_size_and_print((long long)opts->max_xfer_size, "\n"); + HDfprintf(output, "rank %d: Block size=", rank); + recover_size_and_print((long long)opts->blk_size, "\n"); + } + + HDfprintf(output, "rank %d: Block Pattern in Dataset=", rank); + if (opts->interleaved) + HDfprintf(output, "Interleaved\n"); + else + HDfprintf(output, "Contiguous\n"); + + HDfprintf(output, "rank %d: I/O Method for MPI and HDF5=", rank); + if (opts->collective) + HDfprintf(output, "Collective\n"); + else + HDfprintf(output, "Independent\n"); + + HDfprintf(output, "rank %d: Geometry=", rank); + if (opts->dim2d) + HDfprintf(output, "2D\n"); + else + HDfprintf(output, "1D\n"); + + HDfprintf(output, "rank %d: VFL used for HDF5 I/O=%s\n", rank, "MPI-IO driver"); + + HDfprintf(output, "rank %d: Data storage method in HDF5=", rank); + if (opts->h5_use_chunks) + HDfprintf(output, "Chunked\n"); + else + HDfprintf(output, "Contiguous\n"); + + { + char *prefix = HDgetenv("HDF5_PARAPREFIX"); + + HDfprintf(output, "rank %d: Env HDF5_PARAPREFIX=%s\n", rank, (prefix ? prefix : "not set")); + } + + HDfprintf(output, "rank %d: ", rank); + h5_dump_info_object(h5_io_info_g); + + HDfprintf(output, "rank %d: ==== End of Parameters ====\n", rank); + HDfprintf(output, "\n"); +} + +/* + * Function: parse_command_line + * Purpose: Parse the command line options and return a STRUCT OPTIONS + * structure which will need to be freed by the calling function. + * Return: Pointer to an OPTIONS structure + * Programmer: Bill Wendling, 31. October 2001 + * Modifications: + * Added 2D testing (Christian Chilan, 10. August 2005) + */ +static struct options * +parse_command_line(int argc, char *argv[]) +{ + register int opt; + struct options *cl_opts; + + cl_opts = (struct options *)malloc(sizeof(struct options)); + + cl_opts->output_file = NULL; + cl_opts->io_types = 0; /* will set default after parsing options */ + cl_opts->num_dsets = 1; + cl_opts->num_files = 1; + cl_opts->num_bpp = 0; + cl_opts->num_iters = 1; + cl_opts->max_num_procs = comm_world_nprocs_g; + cl_opts->min_num_procs = 1; + cl_opts->max_xfer_size = 0; + cl_opts->min_xfer_size = 0; + cl_opts->blk_size = 0; + cl_opts->interleaved = 0; /* Default to contiguous blocks in dataset */ + cl_opts->collective = 0; /* Default to independent I/O access */ + cl_opts->dim2d = 0; /* Default to 1D */ + cl_opts->print_times = FALSE; /* Printing times is off by default */ + cl_opts->print_raw = FALSE; /* Printing raw data throughput is off by default */ + cl_opts->h5_alignment = 1; /* No alignment for HDF5 objects by default */ + cl_opts->h5_threshold = 1; /* No threshold for aligning HDF5 objects by default */ + cl_opts->h5_use_chunks = FALSE; /* Don't chunk the HDF5 dataset by default */ + cl_opts->h5_write_only = FALSE; /* Do both read and write by default */ + cl_opts->verify = FALSE; /* No Verify data correctness by default */ + + while ((opt = H5_get_option(argc, (const char **)argv, s_opts, l_opts)) != EOF) { + switch ((char)opt) { + case 'a': + cl_opts->h5_alignment = parse_size_directive(H5_optarg); + break; + case 'A': { + const char *end = H5_optarg; + + while (end && *end != '\0') { + char buf[10]; + int i; + + HDmemset(buf, '\0', sizeof(buf)); + + for (i = 0; *end != '\0' && *end != ','; ++end) + if (isalnum(*end) && i < 10) + buf[i++] = *end; + + if (!HDstrcasecmp(buf, "phdf5")) { + cl_opts->io_types |= PIO_HDF5; + } + else if (!HDstrcasecmp(buf, "mpiio")) { + cl_opts->io_types |= PIO_MPI; + } + else if (!HDstrcasecmp(buf, "posix")) { + cl_opts->io_types |= PIO_POSIX; + } + else { + HDfprintf(stderr, "pio_perf: invalid --api option %s\n", buf); + HDexit(EXIT_FAILURE); + } + + if (*end == '\0') + break; + + end++; + } + } + + break; +#if 0 + case 'b': + /* the future "binary" option */ + break; +#endif /* 0 */ + case 'B': + cl_opts->blk_size = (size_t)parse_size_directive(H5_optarg); + break; + case 'c': + /* Turn on chunked HDF5 dataset creation */ + cl_opts->h5_use_chunks = TRUE; + break; + case 'C': + cl_opts->collective = 1; + break; + case 'd': + cl_opts->num_dsets = atoi(H5_optarg); + break; + case 'D': { + const char *end = H5_optarg; + + while (end && *end != '\0') { + char buf[10]; + int i; + + HDmemset(buf, '\0', sizeof(buf)); + + for (i = 0; *end != '\0' && *end != ','; ++end) + if (HDisalnum(*end) && i < 10) + buf[i++] = *end; + + if (HDstrlen(buf) > 1 || HDisdigit(buf[0])) { + size_t j; + + for (j = 0; j < 10 && buf[j] != '\0'; ++j) + if (!isdigit(buf[j])) { + HDfprintf(stderr, "pio_perf: invalid --debug option %s\n", buf); + HDexit(EXIT_FAILURE); + } + + pio_debug_level = atoi(buf); + + if (pio_debug_level > 4) + pio_debug_level = 4; + else if (pio_debug_level < 0) + pio_debug_level = 0; + } + else { + switch (*buf) { + case 'r': + /* Turn on raw data throughput info */ + cl_opts->print_raw = TRUE; + break; + case 't': + /* Turn on time printing */ + cl_opts->print_times = TRUE; + break; + case 'v': + /* Turn on verify data correctness*/ + cl_opts->verify = TRUE; + break; + default: + HDfprintf(stderr, "pio_perf: invalid --debug option %s\n", buf); + HDexit(EXIT_FAILURE); + } + } + + if (*end == '\0') + break; + + end++; + } + } + + break; + case 'e': + cl_opts->num_bpp = parse_size_directive(H5_optarg); + break; + case 'F': + cl_opts->num_files = HDatoi(H5_optarg); + break; + case 'g': + cl_opts->dim2d = 1; + break; + case 'i': + cl_opts->num_iters = HDatoi(H5_optarg); + break; + case 'I': + cl_opts->interleaved = 1; + break; + case 'o': + cl_opts->output_file = H5_optarg; + break; + case 'p': + cl_opts->min_num_procs = HDatoi(H5_optarg); + break; + case 'P': + cl_opts->max_num_procs = HDatoi(H5_optarg); + break; + case 'T': + cl_opts->h5_threshold = parse_size_directive(H5_optarg); + break; + case 'w': + cl_opts->h5_write_only = TRUE; + break; + case 'x': + cl_opts->min_xfer_size = (size_t)parse_size_directive(H5_optarg); + break; + case 'X': + cl_opts->max_xfer_size = (size_t)parse_size_directive(H5_optarg); + break; + case 'h': + case '?': + default: + usage(progname); + HDfree(cl_opts); + return NULL; + } + } + + if (cl_opts->num_bpp == 0) { + if (cl_opts->dim2d == 0) + cl_opts->num_bpp = 256 * ONE_KB; + else + cl_opts->num_bpp = 8 * ONE_KB; + } + + if (cl_opts->max_xfer_size == 0) + cl_opts->max_xfer_size = (size_t)cl_opts->num_bpp; + + if (cl_opts->min_xfer_size == 0) + cl_opts->min_xfer_size = (size_t)(cl_opts->num_bpp) / 2; + + if (cl_opts->blk_size == 0) + cl_opts->blk_size = (size_t)(cl_opts->num_bpp) / 2; + + /* set default if none specified yet */ + if (!cl_opts->io_types) + cl_opts->io_types = PIO_HDF5 | PIO_MPI | PIO_POSIX; /* run all API */ + + /* verify parameters sanity. Adjust if needed. */ + /* cap xfer_size with bytes per process */ + if (!cl_opts->dim2d) { + if (cl_opts->min_xfer_size > (size_t)cl_opts->num_bpp) + cl_opts->min_xfer_size = (size_t)cl_opts->num_bpp; + if (cl_opts->max_xfer_size > (size_t)cl_opts->num_bpp) + cl_opts->max_xfer_size = (size_t)cl_opts->num_bpp; + } + if (cl_opts->min_xfer_size > cl_opts->max_xfer_size) + cl_opts->min_xfer_size = cl_opts->max_xfer_size; + if (cl_opts->blk_size > (size_t)cl_opts->num_bpp) + cl_opts->blk_size = (size_t)cl_opts->num_bpp; + /* check range of number of processes */ + if (cl_opts->min_num_procs <= 0) + cl_opts->min_num_procs = 1; + if (cl_opts->max_num_procs <= 0) + cl_opts->max_num_procs = 1; + if (cl_opts->min_num_procs > cl_opts->max_num_procs) + cl_opts->min_num_procs = cl_opts->max_num_procs; + /* check iteration */ + if (cl_opts->num_iters <= 0) + cl_opts->num_iters = 1; + + return cl_opts; +} + +/* + * Function: parse_size_directive + * Purpose: Parse the size directive passed on the commandline. The size + * directive is an integer followed by a size indicator: + * + * K, k - Kilobyte + * M, m - Megabyte + * G, g - Gigabyte + * + * Return: The size as a off_t because this is related to file size. + * If an unknown size indicator is used, then the program will + * exit with EXIT_FAILURE as the return value. + * Programmer: Bill Wendling, 18. December 2001 + * Modifications: + */ +static off_t +parse_size_directive(const char *size) +{ + off_t s; + char *endptr; + + s = HDstrtol(size, &endptr, 10); + + if (endptr && *endptr) { + while (*endptr != '\0' && (*endptr == ' ' || *endptr == '\t')) + ++endptr; + + switch (*endptr) { + case 'K': + case 'k': + s *= ONE_KB; + break; + case 'M': + case 'm': + s *= ONE_MB; + break; + case 'G': + case 'g': + s *= ONE_GB; + break; + default: + HDfprintf(stderr, "Illegal size specifier '%c'\n", *endptr); + HDexit(EXIT_FAILURE); + } + } + + return s; +} + +/* + * Function: usage + * Purpose: Print a usage message and then exit. + * Return: Nothing + * Programmer: Bill Wendling, 31. October 2001 + * Modifications: + * Added 2D testing (Christian Chilan, 10. August 2005) + */ +static void +usage(const char *prog) +{ + int myrank; + + MPI_Comm_rank(pio_comm_g, &myrank); + + if (myrank == 0) { + print_version(prog); + HDprintf("usage: %s [OPTIONS]\n", prog); + HDprintf(" OPTIONS\n"); + HDprintf(" -h, --help Print a usage message and exit\n"); + HDprintf(" -a S, --align=S Alignment of objects in HDF5 file [default: 1]\n"); + HDprintf(" -A AL, --api=AL Which APIs to test [default: all of them]\n"); +#if 0 + HDprintf(" -b, --binary The elusive binary option\n"); +#endif /* 0 */ + HDprintf(" -B S, --block-size=S Block size within transfer buffer\n"); + HDprintf(" (see below for description)\n"); + HDprintf(" [default: half the number of bytes per process\n"); + HDprintf(" per dataset]\n"); + HDprintf(" -c, --chunk Create HDF5 datasets using chunked storage\n"); + HDprintf(" [default: contiguous storage]\n"); + HDprintf(" -C, --collective Use collective I/O for MPI and HDF5 APIs\n"); + HDprintf(" [default: independent I/O)\n"); + HDprintf(" -d N, --num-dsets=N Number of datasets per file [default: 1]\n"); + HDprintf(" -D DL, --debug=DL Indicate the debugging level\n"); + HDprintf(" [default: no debugging]\n"); + HDprintf(" -e S, --num-bytes=S Number of bytes per process per dataset\n"); + HDprintf(" (see below for description)\n"); + HDprintf(" [default: 256K for 1D, 8K for 2D]\n"); + HDprintf(" -F N, --num-files=N Number of files [default: 1]\n"); + HDprintf(" -g, --geometry Use 2D geometry [default: 1D geometry]\n"); + HDprintf(" -i N, --num-iterations=N Number of iterations to perform [default: 1]\n"); + HDprintf(" -I, --interleaved Interleaved access pattern\n"); + HDprintf(" (see below for example)\n"); + HDprintf(" [default: Contiguous access pattern]\n"); + HDprintf(" -o F, --output=F Output raw data into file F [default: none]\n"); + HDprintf(" -p N, --min-num-processes=N Minimum number of processes to use [default: 1]\n"); + HDprintf(" -P N, --max-num-processes=N Maximum number of processes to use\n"); + HDprintf(" [default: all MPI_COMM_WORLD processes ]\n"); + HDprintf(" -T S, --threshold=S Threshold for alignment of objects in HDF5 file\n"); + HDprintf(" [default: 1]\n"); + HDprintf(" -w, --write-only Perform write tests not the read tests\n"); + HDprintf(" -x S, --min-xfer-size=S Minimum transfer buffer size\n"); + HDprintf(" (see below for description)\n"); + HDprintf(" [default: half the number of bytes per process\n"); + HDprintf(" per dataset]\n"); + HDprintf(" -X S, --max-xfer-size=S Maximum transfer buffer size\n"); + HDprintf(" [default: the number of bytes per process per\n"); + HDprintf(" dataset]\n"); + HDprintf("\n"); + HDprintf(" F - is a filename.\n"); + HDprintf(" N - is an integer >=0.\n"); + HDprintf(" S - is a size specifier, an integer >=0 followed by a size indicator:\n"); + HDprintf(" K - Kilobyte (%d)\n", ONE_KB); + HDprintf(" M - Megabyte (%d)\n", ONE_MB); + HDprintf(" G - Gigabyte (%d)\n", ONE_GB); + HDprintf("\n"); + HDprintf(" Example: '37M' is 37 megabytes or %d bytes\n", 37 * ONE_MB); + HDprintf("\n"); + HDprintf(" AL - is an API list. Valid values are:\n"); + HDprintf(" phdf5 - Parallel HDF5\n"); + HDprintf(" mpiio - MPI-I/O\n"); + HDprintf(" posix - POSIX\n"); + HDprintf("\n"); + HDprintf(" Example: --api=mpiio,phdf5\n"); + HDprintf("\n"); + HDprintf(" Dataset size:\n"); + HDprintf(" Depending on the selected geometry, each test dataset is either a linear\n"); + HDprintf(" array of size bytes-per-process * num-processes, or a square array of size\n"); + HDprintf(" (bytes-per-process * num-processes) x (bytes-per-process * num-processes).\n"); + HDprintf("\n"); + HDprintf(" Block size vs. Transfer buffer size:\n"); + HDprintf(" buffer-size controls the size of the memory buffer, which is broken into\n"); + HDprintf(" blocks and written to the file. Depending on the selected geometry, each\n"); + HDprintf(" block can be a linear array of size block-size or a square array of size\n"); + HDprintf(" block-size x block-size. The arrangement in which blocks are written is\n"); + HDprintf(" determined by the access pattern.\n"); + HDprintf("\n"); + HDprintf(" In 1D geometry, the transfer buffer is a linear array of size buffer-size.\n"); + HDprintf(" In 2D geometry, it is a rectangular array of size block-size x buffer-size\n"); + HDprintf(" or buffer-size x block-size if interleaved pattern is selected.\n"); + HDprintf("\n"); + HDprintf(" Interleaved and Contiguous patterns in 1D geometry:\n"); + HDprintf(" When contiguous access pattern is chosen, the dataset is evenly divided\n"); + HDprintf(" into num-processes regions and each process writes data to its own region.\n"); + HDprintf(" When interleaved blocks are written to a dataset, space for the first\n"); + HDprintf(" block of the first process is allocated in the dataset, then space is\n"); + HDprintf(" allocated for the first block of the second process, etc. until space is\n"); + HDprintf(" allocated for the first block of each process, then space is allocated for\n"); + HDprintf(" the second block of the first process, the second block of the second\n"); + HDprintf(" process, etc.\n"); + HDprintf("\n"); + HDprintf(" For example, with a 3 process run, 512KB bytes-per-process, 256KB transfer\n"); + HDprintf(" buffer size, and 64KB block size, each process must issue 2 transfer\n"); + HDprintf(" requests to complete access to the dataset.\n"); + HDprintf(" Contiguous blocks of the first transfer request are written like so:\n"); + HDprintf(" 1111----2222----3333----\n"); + HDprintf(" Interleaved blocks of the first transfer request are written like so:\n"); + HDprintf(" 123123123123------------\n"); + HDprintf(" The actual number of I/O operations involved in a transfer request\n"); + HDprintf(" depends on the access pattern and communication mode.\n"); + HDprintf(" When using independent I/O with interleaved pattern, each process\n"); + HDprintf(" performs 4 small non-contiguous I/O operations per transfer request.\n"); + HDprintf(" If collective I/O is turned on, the combined content of the buffers of\n"); + HDprintf(" the 3 processes will be written using one collective I/O operation\n"); + HDprintf(" per transfer request.\n"); + HDprintf("\n"); + HDprintf(" For information about access patterns in 2D geometry, please refer to the\n"); + HDprintf(" HDF5 Reference Manual.\n"); + HDprintf("\n"); + HDprintf(" DL - is a list of debugging flags. Valid values are:\n"); + HDprintf(" 1 - Minimal\n"); + HDprintf(" 2 - Not quite everything\n"); + HDprintf(" 3 - Everything\n"); + HDprintf(" 4 - The kitchen sink\n"); + HDprintf(" r - Raw data I/O throughput information\n"); + HDprintf(" t - Times as well as throughputs\n"); + HDprintf(" v - Verify data correctness\n"); + HDprintf("\n"); + HDprintf(" Example: --debug=2,r,t\n"); + HDprintf("\n"); + HDprintf(" Environment variables:\n"); + HDprintf(" HDF5_NOCLEANUP Do not remove data files if set [default remove]\n"); + HDprintf(" HDF5_MPI_INFO MPI INFO object key=value separated by ;\n"); + HDprintf(" HDF5_PARAPREFIX Paralllel data files prefix\n"); + fflush(stdout); + } /* end if */ +} /* end usage() */ + +#else /* H5_HAVE_PARALLEL */ + +/* + * Function: main + * Purpose: Dummy main() function for if HDF5 was configured without + * parallel stuff. + * Return: EXIT_SUCCESS + * Programmer: Bill Wendling, 14. November 2001 + */ +int +main(void) +{ + HDprintf("No parallel IO performance because parallel is not configured\n"); + return EXIT_SUCCESS; +} /* end main */ + +#endif /* !H5_HAVE_PARALLEL */ diff --git a/tools/src/h5perf/pio_perf.h b/tools/src/h5perf/pio_perf.h new file mode 100644 index 0000000..8924c20 --- /dev/null +++ b/tools/src/h5perf/pio_perf.h @@ -0,0 +1,109 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the root of the source code * + * distribution tree, or in https://www.hdfgroup.org/licenses. * + * If you do not have access to either file, you may request a copy from * + * help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#ifndef PIO_PERF_H +#define PIO_PERF_H + +#ifndef STANDALONE +#include "io_timer.h" +#include "H5private.h" +#include "h5tools.h" +#include "h5tools_utils.h" +#else +#include "io_timer.h" +#include "pio_standalone.h" +#endif + +#ifdef H5_HAVE_PARALLEL +extern MPI_Info h5_io_info_g; /* MPI INFO object for IO */ +#endif + +#ifdef H5_HAVE_PARALLEL +int h5_set_info_object(void); +void h5_dump_info_object(MPI_Info info); +#endif + +/* setup the dataset no fill option if this is v1.5 or more */ +#if H5_VERS_MAJOR > 1 || H5_VERS_MINOR > 4 +#define H5_HAVE_NOFILL 1 +#endif + +typedef enum iotype_ { + POSIXIO, + MPIO, + PHDF5 + /*NUM_TYPES*/ +} iotype; + +typedef struct parameters_ { + iotype io_type; /* The type of IO test to perform */ + int num_procs; /* Maximum number of processes to use */ + long num_files; /* Number of files to create */ + long num_dsets; /* Number of datasets to create */ + off_t num_bytes; /* Number of bytes in each dset */ + int num_iters; /* Number of times to loop doing the IO */ + size_t buf_size; /* Buffer size */ + size_t blk_size; /* Block size */ + unsigned interleaved; /* Interleaved vs. contiguous blocks */ + unsigned collective; /* Collective vs. independent I/O */ + unsigned dim2d; /* 1D vs. 2D */ + hsize_t h5_align; /* HDF5 object alignment */ + hsize_t h5_thresh; /* HDF5 object alignment threshold */ + int h5_use_chunks; /* Make HDF5 dataset chunked */ + int h5_write_only; /* Perform the write tests only */ + int verify; /* Verify data correctness */ +} parameters; + +typedef struct results_ { + herr_t ret_code; + io_time_t *timers; +} results; + +#ifndef SUCCESS +#define SUCCESS 0 +#endif /* !SUCCESS */ + +#ifndef FAIL +#define FAIL -1 +#endif /* !FAIL */ + +extern FILE * output; /* output file */ +extern io_time_t *timer_g; /* timer: global for stub functions */ +extern int comm_world_rank_g; /* my rank in MPI_COMM_RANK */ +extern int comm_world_nprocs_g; /* num. of processes of MPI_COMM_WORLD */ +extern MPI_Comm pio_comm_g; /* Communicator to run the PIO */ +extern int pio_mpi_rank_g; /* MPI rank of pio_comm_g */ +extern int pio_mpi_nprocs_g; /* number of processes of pio_comm_g */ +extern int pio_debug_level; /* The debug level: + * 0 - Off + * 1 - Minimal + * 2 - Some more + * 3 - Maximal + * 4 - Even More Debugging (timer stuff) + */ + +#define HDprint_rank(f) /* print rank in MPI_COMM_WORLD */ HDfprintf(f, "%d: ", comm_world_rank_g); +#define HDprint_size(f) /* print size of MPI_COMM_WORLD */ HDfprintf(f, "%d", comm_world_nprocs_g); +#define HDprint_rank_size(f) /* print rank/size of MPI_COMM_WORLD */ \ + HDfprintf(f, "%d/%d: ", comm_world_rank_g, comm_world_nprocs_g); + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +extern results do_pio(parameters param); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* PIO_PERF_H */ diff --git a/tools/src/h5perf/sio_engine.c b/tools/src/h5perf/sio_engine.c new file mode 100644 index 0000000..1af2318 --- /dev/null +++ b/tools/src/h5perf/sio_engine.c @@ -0,0 +1,1328 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the root of the source code * + * distribution tree, or in https://www.hdfgroup.org/licenses. * + * If you do not have access to either file, you may request a copy from * + * help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Author: Christian Chilan, April 2008 + */ + +#include "hdf5.h" + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> + +#ifdef H5_HAVE_UNISTD_H +#include <sys/types.h> +#include <unistd.h> +#endif + +#ifdef H5_HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif + +#include "sio_perf.h" + +/* Macro definitions */ + +/* sizes of various items. these sizes won't change during program execution */ +#define ELMT_H5_TYPE H5T_NATIVE_UCHAR + +#define GOTOERROR(errcode) \ + { \ + ret_code = errcode; \ + goto done; \ + } +#define ERRMSG(mesg) \ + { \ + HDfprintf(stderr, "*** Assertion failed (%s) at line %4d in %s\n", mesg, (int)__LINE__, __FILE__); \ + } + +/* verify: if val is false (0), print mesg. */ +#define VRFY(val, mesg) \ + do { \ + if (!val) { \ + ERRMSG(mesg); \ + GOTOERROR(FAIL); \ + } \ + } while (0) + +/* POSIX I/O macros */ +#ifdef H5_HAVE_WIN32_API +/* Can't link against the library, so this test will use the older, non-Unicode + * _open() call on Windows. + */ +#define HDopen(S, F, ...) _open(S, F | _O_BINARY, __VA_ARGS__) +#endif /* H5_HAVE_WIN32_API */ +#define POSIXCREATE(fn) HDopen(fn, O_CREAT | O_TRUNC | O_RDWR, 0600) +#define POSIXOPEN(fn, F) HDopen(fn, F, 0600) +#define POSIXCLOSE(F) HDclose(F) +#define POSIXSEEK(F, L) HDlseek(F, L, SEEK_SET) +#define POSIXWRITE(F, B, S) HDwrite(F, B, S) +#define POSIXREAD(F, B, S) HDread(F, B, S) + +enum { SIO_CREATE = 1, SIO_WRITE = 2, SIO_READ = 4 }; + +/* Global variables */ +static int clean_file_g = -1; /*whether to cleanup temporary test */ +/*files. -1 is not defined; */ +/*0 is no cleanup; 1 is do cleanup */ + +/* the different types of file descriptors we can expect */ +typedef union { + int posixfd; /* POSIX file handle*/ + hid_t h5fd; /* HDF5 file */ +} file_descr; + +/* local functions */ +static char * sio_create_filename(iotype iot, const char *base_name, char *fullname, size_t size, + parameters *param); +static herr_t do_write(results *res, file_descr *fd, parameters *parms, void *buffer); +static herr_t do_read(results *res, file_descr *fd, parameters *parms, void *buffer); +static herr_t dset_write(int local_dim, file_descr *fd, parameters *parms, void *buffer); +static herr_t posix_buffer_write(int local_dim, file_descr *fd, parameters *parms, void *buffer); +static herr_t dset_read(int localrank, file_descr *fd, parameters *parms, void *buffer, const char *buffer2); +static herr_t posix_buffer_read(int local_dim, file_descr *fd, parameters *parms, void *buffer); +static herr_t do_fopen(parameters *param, char *fname, file_descr *fd /*out*/, int flags); +hid_t set_vfd(parameters *param); +static herr_t do_fclose(iotype iot, file_descr *fd); +static void do_cleanupfile(iotype iot, char *fname); + +/* global variables */ +static HDoff_t offset[MAX_DIMS]; /* dataset size in bytes */ +static size_t buf_offset[MAX_DIMS]; /* dataset size in bytes */ +static int order[MAX_DIMS]; /* dimension access order */ +static size_t linear_buf_size; /* linear buffer size */ +static int cont_dim; /* lowest dimension for contiguous POSIX + access */ +static size_t cont_size; /* size of contiguous POSIX access */ +static hid_t fapl; /* file access list */ +static unsigned char *buf_p; /* buffer pointer */ +static const char * multi_letters = "msbrglo"; /* string for multi driver */ + +/* HDF5 global variables */ +static hsize_t h5count[MAX_DIMS]; /*selection count */ +static hssize_t h5offset[MAX_DIMS]; /* Selection offset within dataspace */ +static hid_t h5dset_space_id = H5I_INVALID_HID; /*dataset space ID */ +static hid_t h5mem_space_id = H5I_INVALID_HID; /*memory dataspace ID */ +static hid_t h5ds_id = H5I_INVALID_HID; /*dataset handle */ +static hid_t h5dcpl = H5I_INVALID_HID; /* Dataset creation property list */ +static hid_t h5dxpl = H5I_INVALID_HID; /* Dataset transfer property list */ + +/* + * Function: do_sio + * Purpose: SIO Engine where IO are executed. + * Return: results + * Programmer: Christian Chilan, April, 2008 + * Modifications: + */ +void +do_sio(parameters param, results *res) +{ + char * buffer = NULL; /*data buffer pointer */ + size_t buf_size[MAX_DIMS]; /* general buffer size in bytes */ + file_descr fd; /* file handles */ + iotype iot; /* API type */ + char base_name[256]; /* test file base name */ + /* return codes */ + herr_t ret_code = 0; /*return code */ + + char fname[FILENAME_MAX]; /* test file name */ + int i; + /* HDF5 variables */ + herr_t hrc; /*HDF5 return code */ + + /* Sanity check parameters */ + + /* IO type */ + iot = param.io_type; + + switch (iot) { + case POSIXIO: + fd.posixfd = -1; + res->timers = io_time_new(SYS_CLOCK); + break; + case HDF5: + fd.h5fd = -1; + res->timers = io_time_new(SYS_CLOCK); + break; + default: + /* unknown request */ + HDfprintf(stderr, "Unknown IO type request (%d)\n", (int)iot); + GOTOERROR(FAIL); + } + + linear_buf_size = 1; + + for (i = 0; i < param.rank; i++) { + buf_size[i] = param.buf_size[i]; + order[i] = param.order[i]; + linear_buf_size *= buf_size[i]; + buf_offset[i] = 0; + offset[i] = 0; + + /* Validate transfer buffer size */ + if (param.buf_size[i] <= 0) { + HDfprintf(stderr, "Transfer buffer size[%d] (%zu) must be > 0\n", i, buf_size[i]); + GOTOERROR(FAIL); + } + + if ((param.dset_size[i] % param.buf_size[i]) != 0) { + HDfprintf(stderr, + "Dataset size[%d] (%" H5_PRINTF_LL_WIDTH "d) must be a multiple of the " + "trasfer buffer size[%d] (%zu)\n", + param.rank, (long long)param.dset_size[i], param.rank, param.buf_size[i]); + GOTOERROR(FAIL); + } + } + + /* Allocate transfer buffer */ + if ((buffer = (char *)malloc(linear_buf_size)) == NULL) { + HDfprintf(stderr, "malloc for transfer buffer size (%zu) failed\n", linear_buf_size); + GOTOERROR(FAIL); + } + + if (sio_debug_level >= 4) + + /* output all of the times for all iterations */ + HDfprintf(output, "Timer details:\n"); + + /* + * Write performance measurement + */ + /* Open file for write */ + + HDstrcpy(base_name, "#sio_tmp"); + sio_create_filename(iot, base_name, fname, sizeof(fname), ¶m); + + if (sio_debug_level > 0) + HDfprintf(output, "data filename=%s\n", fname); + + io_time_set(res->timers, HDF5_GROSS_WRITE_FIXED_DIMS, TSTART); + hrc = do_fopen(¶m, fname, &fd, SIO_CREATE | SIO_WRITE); + VRFY((hrc == SUCCESS), "do_fopen failed"); + + io_time_set(res->timers, HDF5_FINE_WRITE_FIXED_DIMS, TSTART); + hrc = do_write(res, &fd, ¶m, buffer); + io_time_set(res->timers, HDF5_FINE_WRITE_FIXED_DIMS, TSTOP); + VRFY((hrc == SUCCESS), "do_write failed"); + + /* Close file for write */ + hrc = do_fclose(iot, &fd); + io_time_set(res->timers, HDF5_GROSS_WRITE_FIXED_DIMS, TSTOP); + VRFY((hrc == SUCCESS), "do_fclose failed"); + + if (!param.h5_write_only) { + /* + * Read performance measurement + */ + + /* Open file for read */ + io_time_set(res->timers, HDF5_GROSS_READ_FIXED_DIMS, TSTART); + hrc = do_fopen(¶m, fname, &fd, SIO_READ); + VRFY((hrc == SUCCESS), "do_fopen failed"); + + io_time_set(res->timers, HDF5_FINE_READ_FIXED_DIMS, TSTART); + hrc = do_read(res, &fd, ¶m, buffer); + io_time_set(res->timers, HDF5_FINE_READ_FIXED_DIMS, TSTOP); + VRFY((hrc == SUCCESS), "do_read failed"); + + /* Close file for read */ + hrc = do_fclose(iot, &fd); + + io_time_set(res->timers, HDF5_GROSS_READ_FIXED_DIMS, TSTOP); + VRFY((hrc == SUCCESS), "do_fclose failed"); + } + + do_cleanupfile(iot, fname); + +done: + /* clean up */ + /* release HDF5 objects */ + + /* close any opened files */ + /* no remove(fname) because that should have happened normally. */ + switch (iot) { + case POSIXIO: + if (fd.posixfd != -1) + hrc = do_fclose(iot, &fd); + break; + case HDF5: + if (fd.h5fd != -1) + hrc = do_fclose(iot, &fd); + break; + default: + /* unknown request */ + HDassert(0 && "Unknown IO type"); + break; + } + + /* release generic resources */ + if (buffer) + free(buffer); + + res->ret_code = ret_code; +} + +/* + * Function: sio_create_filename + * Purpose: Create a new filename to write to. Determine the correct + * suffix to append to the filename by the type of I/O we're + * doing. Also, place in the /tmp/{$USER,$LOGIN} directory if + * USER or LOGIN are specified in the environment. + * Return: Pointer to filename or NULL + * Programmer: Bill Wendling, 21. November 2001 + * Modifications: Support for file drivers. Christian Chilan, April, 2008 + */ +static char * +sio_create_filename(iotype iot, const char *base_name, char *fullname, size_t size, parameters *param) +{ + const char *prefix, *suffix = ""; + char * ptr, last = '\0'; + size_t i, j; + vfdtype vfd; + vfd = param->vfd; + + if (!base_name || !fullname || size < 1) + return NULL; + + memset(fullname, 0, size); + + switch (iot) { + case POSIXIO: + suffix = ".posix"; + break; + case HDF5: + suffix = ".h5"; + if (vfd == family) + suffix = "%05d.h5"; + else if (vfd == multi) + suffix = NULL; + break; + default: + /* unknown request */ + HDfprintf(stderr, "Unknown IO type request (%d)\n", (int)iot); + HDassert(0 && "Unknown IO type"); + break; + } + + /* First use the environment variable and then try the constant */ + prefix = HDgetenv("HDF5_PREFIX"); + +#ifdef HDF5_PREFIX + if (!prefix) + prefix = HDF5_PREFIX; +#endif /* HDF5_PREFIX */ + + /* Prepend the prefix value to the base name */ + if (prefix && *prefix) { + /* If the prefix specifies the HDF5_PREFIX directory, then + * default to using the "/tmp/$USER" or "/tmp/$LOGIN" + * directory instead. */ + register char *user, *login, *subdir; + + user = HDgetenv("USER"); + login = HDgetenv("LOGIN"); + subdir = (user ? user : login); + + if (subdir) { + for (i = 0; i < size - 1 && prefix[i]; i++) + fullname[i] = prefix[i]; + + fullname[i++] = '/'; + + for (j = 0; i < size && subdir[j]; i++, j++) + fullname[i] = subdir[j]; + } + else { + /* We didn't append the prefix yet */ + HDstrncpy(fullname, prefix, size); + fullname[size - 1] = '\0'; + } + + if ((HDstrlen(fullname) + HDstrlen(base_name) + 1) < size) { + /* Append the base_name with a slash first. Multiple slashes are + * handled below. */ + h5_stat_t buf; + + if (HDstat(fullname, &buf) < 0) + /* The directory doesn't exist just yet */ + if (HDmkdir(fullname, 0755) < 0 && errno != EEXIST) { + /* We couldn't make the "/tmp/${USER,LOGIN}" subdirectory. + * Default to PREFIX's original prefix value. */ + HDstrcpy(fullname, prefix); + } + + HDstrcat(fullname, "/"); + HDstrcat(fullname, base_name); + } + else { + /* Buffer is too small */ + return NULL; + } + } + else if (strlen(base_name) >= size) { + /* Buffer is too small */ + return NULL; + } + else { + HDstrcpy(fullname, base_name); + } + + /* Append a suffix */ + if (suffix) { + if (HDstrlen(fullname) + HDstrlen(suffix) >= size) + return NULL; + + HDstrcat(fullname, suffix); + } + + /* Remove any double slashes in the filename */ + for (ptr = fullname, i = j = 0; ptr && (i < size); i++, ptr++) { + if (*ptr != '/' || last != '/') + fullname[j++] = *ptr; + + last = *ptr; + } + + return fullname; +} + +/* + * Function: do_write + * Purpose: Write the required amount of data to the file. + * Return: SUCCESS or FAIL + * Programmer: Christian Chilan, April, 2008 + * Modifications: + */ +static herr_t +do_write(results *res, file_descr *fd, parameters *parms, void *buffer) +{ + int ret_code = SUCCESS; + char dname[64]; + int i; + size_t u; + /* HDF5 variables */ + herr_t hrc; /*HDF5 return code */ + hsize_t h5dims[MAX_DIMS]; /*dataset dim sizes */ + hsize_t h5chunk[MAX_DIMS]; /*dataset dim sizes */ + hsize_t h5block[MAX_DIMS]; /*dataspace selection */ + hsize_t h5stride[MAX_DIMS]; /*selection stride */ + hsize_t h5start[MAX_DIMS]; /*selection start */ + hsize_t h5maxdims[MAX_DIMS]; + int rank; /*rank of dataset */ + + /* Prepare buffer for verifying data */ + /* if (parms->verify) + memset(buffer,1,linear_buf_size); */ + + buf_p = (unsigned char *)buffer; + + for (u = 0; u < linear_buf_size; u++) + buf_p[u] = u % 128; + + rank = parms->rank; + + for (i = 0; i < rank; i++) + h5offset[i] = offset[i] = 0; + + /* I/O Access specific setup */ + switch (parms->io_type) { + case POSIXIO: + + /* determine lowest dimension for contiguous POSIX access */ + cont_dim = rank; + + for (i = rank - 1; i >= 0; i--) { + if (parms->buf_size[i] == parms->dset_size[i]) + cont_dim = i; + else + break; + } + + /* determine size of the contiguous POSIX access */ + cont_size = (!cont_dim) ? 1 : parms->buf_size[cont_dim - 1]; + for (i = cont_dim; i < rank; i++) + cont_size *= parms->buf_size[i]; + + break; + + case HDF5: /* HDF5 setup */ + + for (i = 0; i < rank; i++) { + h5dims[i] = parms->dset_size[i]; + h5start[i] = 0; + h5stride[i] = 1; + h5block[i] = 1; + h5count[i] = parms->buf_size[i]; + h5chunk[i] = parms->chk_size[i]; + h5maxdims[i] = H5S_UNLIMITED; + } + + if (parms->h5_use_chunks && parms->h5_extendable) { + h5dset_space_id = H5Screate_simple(rank, h5count, h5maxdims); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + } + else { + h5dset_space_id = H5Screate_simple(rank, h5dims, NULL); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + } + + hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, h5start, h5stride, h5count, h5block); + VRFY((hrc >= 0), "H5Sselect_hyperslab"); + + /* Create the memory dataspace that corresponds to the xfer buffer */ + h5mem_space_id = H5Screate_simple(rank, h5count, NULL); + VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + + /* Create the dataset transfer property list */ + h5dxpl = H5Pcreate(H5P_DATASET_XFER); + if (h5dxpl < 0) { + HDfprintf(stderr, "HDF5 Property List Create failed\n"); + GOTOERROR(FAIL); + } + + break; + + default: + HDfprintf(stderr, "Unknown IO type request (%d)\n", (int)parms->io_type); + GOTOERROR(FAIL); + break; + } /* end switch */ + + /* create dataset */ + switch (parms->io_type) { + case POSIXIO: + break; + + case HDF5: + h5dcpl = H5Pcreate(H5P_DATASET_CREATE); + + if (h5dcpl < 0) { + HDfprintf(stderr, "HDF5 Property List Create failed\n"); + GOTOERROR(FAIL); + } + + if (parms->h5_use_chunks) { + /* Set the chunk size to be the same as the buffer size */ + hrc = H5Pset_chunk(h5dcpl, rank, h5chunk); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } /* end if */ + } /* end if */ + + HDsprintf(dname, "Dataset_%ld", (unsigned long)parms->num_bytes); + h5ds_id = + H5Dcreate2(fd->h5fd, dname, ELMT_H5_TYPE, h5dset_space_id, H5P_DEFAULT, h5dcpl, H5P_DEFAULT); + + if (h5ds_id < 0) { + HDfprintf(stderr, "HDF5 Dataset Create failed\n"); + GOTOERROR(FAIL); + } + + hrc = H5Pclose(h5dcpl); + /* verifying the close of the dcpl */ + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Property List Close failed\n"); + GOTOERROR(FAIL); + } + break; + + default: + /* unknown request */ + HDfprintf(stderr, "Unknown IO type request (%d)\n", (int)parms->io_type); + GOTOERROR(FAIL); + break; + } + + /* Start "raw data" write timer */ + io_time_set(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, TSTART); + + /* Perform write */ + hrc = dset_write(rank - 1, fd, parms, buffer); + + if (hrc < 0) { + HDfprintf(stderr, "Error in dataset write\n"); + GOTOERROR(FAIL); + } + + /* Stop "raw data" write timer */ + io_time_set(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, TSTOP); + + /* Calculate write time */ + + /* Close dataset. Only HDF5 needs to do an explicit close. */ + if (parms->io_type == HDF5) { + hrc = H5Dclose(h5ds_id); + + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Close failed\n"); + GOTOERROR(FAIL); + } + + h5ds_id = H5I_INVALID_HID; + } /* end if */ + +done: + + /* release HDF5 objects */ + if (h5dset_space_id != -1) { + hrc = H5Sclose(h5dset_space_id); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Space Close failed\n"); + ret_code = FAIL; + } + else { + h5dset_space_id = H5I_INVALID_HID; + } + } + + if (h5mem_space_id != -1) { + hrc = H5Sclose(h5mem_space_id); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Memory Space Close failed\n"); + ret_code = FAIL; + } + else { + h5mem_space_id = H5I_INVALID_HID; + } + } + + if (h5dxpl != -1) { + hrc = H5Pclose(h5dxpl); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Transfer Property List Close failed\n"); + ret_code = FAIL; + } + else { + h5dxpl = H5I_INVALID_HID; + } + } + + return ret_code; +} + +/* + * Function: dset_write + * Purpose: Write buffer into the dataset. + * Return: SUCCESS or FAIL + * Programmer: Christian Chilan, April, 2008 + * Modifications: + */ +static herr_t +dset_write(int local_dim, file_descr *fd, parameters *parms, void *buffer) +{ + int cur_dim = order[local_dim] - 1; + int ret_code = SUCCESS; + int k; + hsize_t dims[MAX_DIMS], maxdims[MAX_DIMS]; + hsize_t i; + int j; + herr_t hrc; + + /* iterates according to the dimensions in order array */ + for (i = 0; i < parms->dset_size[cur_dim]; i += parms->buf_size[cur_dim]) { + + h5offset[cur_dim] = (hssize_t)i; + offset[cur_dim] = (HDoff_t)i; + + if (local_dim > 0) { + + dset_write(local_dim - 1, fd, parms, buffer); + } + else { + + switch (parms->io_type) { + + case POSIXIO: + /* initialize POSIX offset in the buffer */ + for (j = 0; j < parms->rank; j++) + buf_offset[j] = 0; + buf_p = (unsigned char *)buffer; + /* write POSIX buffer */ + posix_buffer_write(0, fd, parms, buffer); + break; + + case HDF5: + /* if dimensions are extendable, extend them as needed during access */ + if (parms->h5_use_chunks && parms->h5_extendable) { + + hrc = H5Sget_simple_extent_dims(h5dset_space_id, dims, maxdims); + VRFY((hrc >= 0), "H5Sget_simple_extent_dims"); + + for (k = 0; k < parms->rank; k++) { + + HDassert(h5offset[k] >= 0); + if (dims[k] <= (hsize_t)h5offset[k]) { + dims[k] = dims[k] + h5count[k]; + hrc = H5Sset_extent_simple(h5dset_space_id, parms->rank, dims, maxdims); + VRFY((hrc >= 0), "H5Sset_extent_simple"); + hrc = H5Dset_extent(h5ds_id, dims); + VRFY((hrc >= 0), "H5Dextend"); + } + } + } + /* applies offset */ + hrc = H5Soffset_simple(h5dset_space_id, h5offset); + VRFY((hrc >= 0), "H5Soffset_simple"); + + /* Write the buffer out */ + hrc = H5Sget_simple_extent_dims(h5dset_space_id, dims, maxdims); + hrc = H5Dwrite(h5ds_id, ELMT_H5_TYPE, h5mem_space_id, h5dset_space_id, h5dxpl, buffer); + VRFY((hrc >= 0), "H5Dwrite"); + + break; + + default: + /* unknown request */ + HDfprintf(stderr, "Unknown IO type request (%d)\n", (int)parms->io_type); + HDassert(0 && "Unknown IO type"); + break; + } /* switch (parms->io_type) */ + } + } +done: + return ret_code; +} + +/* + * Function: posix_buffer_write + * Purpose: Write buffer into the POSIX file considering contiguity. + * Return: SUCCESS or FAIL + * Programmer: Christian Chilan, April, 2008 + * Modifications: + */ + +static herr_t +posix_buffer_write(int local_dim, file_descr *fd, parameters *parms, void *buffer) +{ + int ret_code = SUCCESS; + + /* if dimension is not contiguous, call recursively */ + if (local_dim < parms->rank - 1 && local_dim != cont_dim) { + size_t u; + + for (u = 0; u < parms->buf_size[local_dim]; u++) { + buf_offset[local_dim] = u; + posix_buffer_write(local_dim + 1, fd, parms, buffer); + + /* if next dimension is cont_dim, it will fill out the buffer + traversing the entire dimension local_dim without the need + of performing iteration */ + if (local_dim + 1 == cont_dim) + break; + } + /* otherwise, perform contiguous POSIX access */ + } + else { + HDoff_t d_offset; + HDoff_t linear_dset_offset = 0; + int i, j, rc; + + buf_offset[local_dim] = 0; + + /* determine offset in the buffer */ + for (i = 0; i < parms->rank; i++) { + d_offset = 1; + + for (j = i + 1; j < parms->rank; j++) + d_offset *= (HDoff_t)parms->dset_size[j]; + + linear_dset_offset += (offset[i] + (HDoff_t)buf_offset[i]) * d_offset; + } + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, linear_dset_offset) < 0 ? -1 : 0; + VRFY((rc == 0), "POSIXSEEK"); + /* check if all bytes are written */ + rc = ((ssize_t)cont_size == POSIXWRITE(fd->posixfd, buf_p, cont_size)); + VRFY((rc != 0), "POSIXWRITE"); + + /* Advance location in buffer */ + buf_p += cont_size; + } + +done: + return ret_code; +} + +/* + * Function: do_read + * Purpose: Read the required amount of data to the file. + * Return: SUCCESS or FAIL + * Programmer: Christian Chilan, April, 2008 + * Modifications: + */ +static herr_t +do_read(results *res, file_descr *fd, parameters *parms, void *buffer) +{ + char * buffer2 = NULL; /* Buffer for data verification */ + int ret_code = SUCCESS; + char dname[64]; + int i; + size_t u; + /* HDF5 variables */ + herr_t hrc; /*HDF5 return code */ + hsize_t h5dims[MAX_DIMS]; /*dataset dim sizes */ + hsize_t h5block[MAX_DIMS]; /*dataspace selection */ + hsize_t h5stride[MAX_DIMS]; /*selection stride */ + hsize_t h5start[MAX_DIMS]; /*selection start */ + int rank; + + /* Allocate data verification buffer */ + if (NULL == (buffer2 = (char *)malloc(linear_buf_size))) { + HDfprintf(stderr, "malloc for data verification buffer size (%zu) failed\n", linear_buf_size); + GOTOERROR(FAIL); + } /* end if */ + + /* Prepare buffer for verifying data */ + for (u = 0; u < linear_buf_size; u++) + buffer2[u] = (char)(u % 128); + + rank = parms->rank; + for (i = 0; i < rank; i++) + h5offset[i] = offset[i] = 0; + + /* I/O Access specific setup */ + switch (parms->io_type) { + case POSIXIO: + cont_dim = rank; + + for (i = rank - 1; i >= 0; i--) { + if (parms->buf_size[i] == parms->dset_size[i]) + cont_dim = i; + else + break; + } + cont_size = (!cont_dim) ? 1 : parms->buf_size[cont_dim - 1]; + for (i = cont_dim; i < rank; i++) + cont_size *= parms->buf_size[i]; + + break; + + case HDF5: /* HDF5 setup */ + for (i = 0; i < rank; i++) { + h5dims[i] = parms->dset_size[i]; + h5start[i] = 0; + h5stride[i] = 1; + h5block[i] = 1; + h5count[i] = parms->buf_size[i]; + } + + h5dset_space_id = H5Screate_simple(rank, h5dims, NULL); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + + hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, h5start, h5stride, h5count, h5block); + VRFY((hrc >= 0), "H5Sselect_hyperslab"); + + /* Create the memory dataspace that corresponds to the xfer buffer */ + h5mem_space_id = H5Screate_simple(rank, h5count, NULL); + VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + + /* Create the dataset transfer property list */ + h5dxpl = H5Pcreate(H5P_DATASET_XFER); + if (h5dxpl < 0) { + HDfprintf(stderr, "HDF5 Property List Create failed\n"); + GOTOERROR(FAIL); + } + break; + + default: + /* unknown request */ + HDfprintf(stderr, "Unknown IO type request (%d)\n", (int)parms->io_type); + GOTOERROR(FAIL); + break; + } /* end switch */ + + /* create dataset */ + switch (parms->io_type) { + case POSIXIO: + break; + + case HDF5: + HDsprintf(dname, "Dataset_%ld", (long)parms->num_bytes); + h5ds_id = H5Dopen2(fd->h5fd, dname, H5P_DEFAULT); + if (h5ds_id < 0) { + HDfprintf(stderr, "HDF5 Dataset open failed\n"); + GOTOERROR(FAIL); + } + break; + + default: + /* unknown request */ + HDfprintf(stderr, "Unknown IO type request (%d)\n", (int)parms->io_type); + GOTOERROR(FAIL); + break; + } /* end switch */ + + /* Start "raw data" read timer */ + io_time_set(res->timers, HDF5_RAW_READ_FIXED_DIMS, TSTART); + hrc = dset_read(rank - 1, fd, parms, buffer, buffer2); + + if (hrc < 0) { + HDfprintf(stderr, "Error in dataset read\n"); + GOTOERROR(FAIL); + } + + /* Stop "raw data" read timer */ + io_time_set(res->timers, HDF5_RAW_READ_FIXED_DIMS, TSTOP); + + /* Calculate read time */ + + /* Close dataset. Only HDF5 needs to do an explicit close. */ + if (parms->io_type == HDF5) { + hrc = H5Dclose(h5ds_id); + + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Close failed\n"); + GOTOERROR(FAIL); + } + + h5ds_id = H5I_INVALID_HID; + } /* end if */ + +done: + + /* release HDF5 objects */ + if (h5dset_space_id != -1) { + hrc = H5Sclose(h5dset_space_id); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Space Close failed\n"); + ret_code = FAIL; + } + else { + h5dset_space_id = H5I_INVALID_HID; + } + } + + if (h5mem_space_id != -1) { + hrc = H5Sclose(h5mem_space_id); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Memory Space Close failed\n"); + ret_code = FAIL; + } + else { + h5mem_space_id = H5I_INVALID_HID; + } + } + + if (h5dxpl != -1) { + hrc = H5Pclose(h5dxpl); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Transfer Property List Close failed\n"); + ret_code = FAIL; + } + else { + h5dxpl = H5I_INVALID_HID; + } + } + + /* release generic resources */ + if (buffer2) + free(buffer2); + + return ret_code; +} + +/* + * Function: dset_read + * Purpose: Read buffer into the dataset. + * Return: SUCCESS or FAIL + * Programmer: Christian Chilan, April, 2008 + * Modifications: + */ + +static herr_t +dset_read(int local_dim, file_descr *fd, parameters *parms, void *buffer, const char *buffer2) +{ + int cur_dim = order[local_dim] - 1; + hsize_t i; + int j; + herr_t hrc; + int ret_code = SUCCESS; + + /* iterate on the current dimension */ + for (i = 0; i < parms->dset_size[cur_dim]; i += parms->buf_size[cur_dim]) { + + h5offset[cur_dim] = (hssize_t)i; + offset[cur_dim] = (HDoff_t)i; + + /* if traverse in order array is incomplete, recurse */ + if (local_dim > 0) { + + ret_code = dset_read(local_dim - 1, fd, parms, buffer, buffer2); + + /* otherwise, write buffer into dataset */ + } + else { + + switch (parms->io_type) { + + case POSIXIO: + for (j = 0; j < parms->rank; j++) { + buf_offset[j] = 0; + } + buf_p = (unsigned char *)buffer; + posix_buffer_read(0, fd, parms, buffer); + break; + + case HDF5: + hrc = H5Soffset_simple(h5dset_space_id, h5offset); + VRFY((hrc >= 0), "H5Soffset_simple"); + /* Read the buffer out */ + hrc = H5Dread(h5ds_id, ELMT_H5_TYPE, h5mem_space_id, h5dset_space_id, h5dxpl, buffer); + VRFY((hrc >= 0), "H5Dread"); + break; + + default: + /* unknown request */ + HDfprintf(stderr, "Unknown IO type request (%d)\n", (int)parms->io_type); + HDassert(0 && "Unknown IO type"); + break; + } /* switch (parms->io_type) */ + } + } +done: + return ret_code; +} + +/* + * Function: posix_buffer_read + * Purpose: Read buffer into the POSIX file considering contiguity. + * Return: SUCCESS or FAIL + * Programmer: Christian Chilan, April, 2008 + * Modifications: + */ + +static herr_t +posix_buffer_read(int local_dim, file_descr *fd, parameters *parms, void *buffer) +{ + int ret_code = SUCCESS; + + /* if local dimension is not contiguous, recurse */ + if (local_dim < parms->rank - 1 && local_dim != cont_dim) { + size_t u; + + for (u = 0; u < parms->buf_size[local_dim]; u++) { + buf_offset[local_dim] = u; + ret_code = posix_buffer_read(local_dim + 1, fd, parms, buffer); + if (local_dim + 1 == cont_dim) + break; + } + /* otherwise, perform contiguous POSIX access */ + } + else { + HDoff_t d_offset; + HDoff_t linear_dset_offset = 0; + int i, j, rc; + + buf_offset[local_dim] = 0; + /* determine offset in buffer */ + for (i = 0; i < parms->rank; i++) { + d_offset = 1; + + for (j = i + 1; j < parms->rank; j++) + d_offset *= (HDoff_t)parms->dset_size[j]; + + linear_dset_offset += (offset[i] + (HDoff_t)buf_offset[i]) * d_offset; + } + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, linear_dset_offset) < 0 ? -1 : 0; + VRFY((rc == 0), "POSIXSEEK"); + /* check if all bytes are read */ + rc = ((ssize_t)cont_size == POSIXREAD(fd->posixfd, buf_p, cont_size)); + VRFY((rc != 0), "POSIXREAD"); + + /* Advance location in buffer */ + buf_p += cont_size; + } +done: + return ret_code; +} + +/* + * Function: do_fopen + * Purpose: Open the specified file. + * Return: SUCCESS or FAIL + * Programmer: Albert Cheng, Bill Wendling, 2001/12/13 + * Modifications: Support for file drivers, Christian Chilan, April, 2008 + */ +static herr_t +do_fopen(parameters *param, char *fname, file_descr *fd /*out*/, int flags) +{ + int ret_code = SUCCESS; + hid_t fcpl; + + switch (param->io_type) { + case POSIXIO: + if (flags & (SIO_CREATE | SIO_WRITE)) + fd->posixfd = POSIXCREATE(fname); + else + fd->posixfd = POSIXOPEN(fname, O_RDONLY); + + if (fd->posixfd < 0) { + HDfprintf(stderr, "POSIX File Open failed(%s)\n", fname); + GOTOERROR(FAIL); + } + + break; + + case HDF5: + + fapl = set_vfd(param); + + if (fapl < 0) { + HDfprintf(stderr, "HDF5 Property List Create failed\n"); + GOTOERROR(FAIL); + } + + fcpl = H5Pcreate(H5P_FILE_CREATE); + if (param->page_size) { + H5Pset_file_space_strategy(fcpl, H5F_FSPACE_STRATEGY_PAGE, 0, (hsize_t)1); + H5Pset_file_space_page_size(fcpl, param->page_size); + if (param->page_buffer_size) + H5Pset_page_buffer_size(fapl, param->page_buffer_size, 0, 0); + } + + /* create the parallel file */ + if (flags & (SIO_CREATE | SIO_WRITE)) { + fd->h5fd = H5Fcreate(fname, H5F_ACC_TRUNC, fcpl, fapl); + } + else { + fd->h5fd = H5Fopen(fname, H5F_ACC_RDONLY, fapl); + } + + if (fd->h5fd < 0) { + HDfprintf(stderr, "HDF5 File Create failed(%s)\n", fname); + GOTOERROR(FAIL); + } + break; + + default: + /* unknown request */ + HDfprintf(stderr, "Unknown IO type request (%d)\n", (int)param->io_type); + GOTOERROR(FAIL); + break; + } + +done: + return ret_code; +} + +/* + * Function: set_vfd + * Purpose: Sets file driver. + * Return: SUCCESS or FAIL + * Programmer: Christian Chilan, April, 2008 + * Modifications: + */ + +hid_t +set_vfd(parameters *param) +{ + hid_t my_fapl = H5I_INVALID_HID; + vfdtype vfd; + + vfd = param->vfd; + + if ((my_fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0) + return -1; + + if (vfd == sec2) { + /* Unix read() and write() system calls */ + if (H5Pset_fapl_sec2(my_fapl) < 0) + return -1; + } + else if (vfd == stdio) { + /* Standard C fread() and fwrite() system calls */ + if (H5Pset_fapl_stdio(my_fapl) < 0) + return -1; + } + else if (vfd == core) { + /* In-core temporary file with 1MB increment */ + if (H5Pset_fapl_core(my_fapl, (size_t)1024 * 1024, TRUE) < 0) + return -1; + } + else if (vfd == split) { + /* Split meta data and raw data each using default driver */ + if (H5Pset_fapl_split(my_fapl, "-m.h5", H5P_DEFAULT, "-r.h5", H5P_DEFAULT) < 0) + return -1; + } + else if (vfd == multi) { + /* Multi-file driver, general case of the split driver */ + H5FD_mem_t memb_map[H5FD_MEM_NTYPES]; + hid_t memb_fapl[H5FD_MEM_NTYPES]; + const char *memb_name[H5FD_MEM_NTYPES]; + char sv[H5FD_MEM_NTYPES][1024]; + haddr_t memb_addr[H5FD_MEM_NTYPES]; + H5FD_mem_t mt; + + HDmemset(memb_map, 0, sizeof memb_map); + HDmemset(memb_fapl, 0, sizeof memb_fapl); + HDmemset(memb_name, 0, sizeof memb_name); + HDmemset(memb_addr, 0, sizeof memb_addr); + + HDassert(HDstrlen(multi_letters) == H5FD_MEM_NTYPES); + for (mt = H5FD_MEM_DEFAULT; mt < H5FD_MEM_NTYPES; mt++) { + memb_fapl[mt] = H5P_DEFAULT; + HDsprintf(sv[mt], "%%s-%c.h5", multi_letters[mt]); + memb_name[mt] = sv[mt]; + memb_addr[mt] = (haddr_t)MAX(mt - 1, 0) * (HADDR_MAX / 10); + } + + if (H5Pset_fapl_multi(my_fapl, memb_map, memb_fapl, memb_name, memb_addr, FALSE) < 0) { + return -1; + } + } + else if (vfd == family) { + hsize_t fam_size = 1 * 1024 * 1024; /*100 MB*/ + + /* Family of files, each 1MB and using the default driver */ + /* if ((val=HDstrtok(NULL, " \t\n\r"))) + fam_size = (hsize_t)(HDstrtod(val, NULL) * 1024*1024); */ + if (H5Pset_fapl_family(my_fapl, fam_size, H5P_DEFAULT) < 0) + return -1; + } + else if (vfd == direct) { +#ifdef H5_HAVE_DIRECT + /* Linux direct read() and write() system calls. Set memory boundary, file block size, + * and copy buffer size to the default values. */ + if (H5Pset_fapl_direct(my_fapl, 1024, 4096, 8 * 4096) < 0) + return -1; +#endif + } + else { + /* Unknown driver */ + return -1; + } + + return my_fapl; +} + +/* + * Function: do_fclose + * Purpose: Close the specified file descriptor. + * Return: SUCCESS or FAIL + * Programmer: Albert Cheng, Bill Wendling, 2001/12/13 + * Modifications: + */ +static herr_t +do_fclose(iotype iot, file_descr *fd /*out*/) +{ + herr_t ret_code = SUCCESS, hrc; + int rc = 0; + + switch (iot) { + case POSIXIO: + rc = POSIXCLOSE(fd->posixfd); + + if (rc != 0) { + HDfprintf(stderr, "POSIX File Close failed\n"); + GOTOERROR(FAIL); + } + + fd->posixfd = -1; + break; + + case HDF5: + hrc = H5Fclose(fd->h5fd); + + if (hrc < 0) { + HDfprintf(stderr, "HDF5 File Close failed\n"); + GOTOERROR(FAIL); + } + + fd->h5fd = -1; + break; + + default: + /* unknown request */ + HDfprintf(stderr, "Unknown IO type request (%d)\n", (int)iot); + GOTOERROR(FAIL); + break; + } + +done: + return ret_code; +} + +/* + * Function: do_cleanupfile + * Purpose: Cleanup temporary file unless HDF5_NOCLEANUP is set. + * Return: void + * Programmer: Albert Cheng 2001/12/12 + * Modifications: Support for file drivers. Christian Chilan, April, 2008 + */ +static void +do_cleanupfile(iotype iot, char *filename) +{ + char temp[2048]; + int j; + hid_t driver; + + if (clean_file_g == -1) + clean_file_g = (HDgetenv("HDF5_NOCLEANUP") == NULL) ? 1 : 0; + + if (clean_file_g) { + + switch (iot) { + case POSIXIO: + HDremove(filename); + break; + + case HDF5: + driver = H5Pget_driver(fapl); + + if (driver == H5FD_FAMILY) { + for (j = 0; /*void*/; j++) { + HDsnprintf(temp, sizeof temp, filename, j); + + if (HDaccess(temp, F_OK) < 0) + break; + + HDremove(temp); + } + } + else if (driver == H5FD_CORE) { + hbool_t backing; /* Whether the core file has backing store */ + + H5Pget_fapl_core(fapl, NULL, &backing); + + /* If the file was stored to disk with bacing store, remove it */ + if (backing) + HDremove(filename); + } + else if (driver == H5FD_MULTI) { + H5FD_mem_t mt; + assert(HDstrlen(multi_letters) == H5FD_MEM_NTYPES); + + for (mt = H5FD_MEM_DEFAULT; mt < H5FD_MEM_NTYPES; mt++) { + HDsnprintf(temp, sizeof temp, "%s-%c.h5", filename, multi_letters[mt]); + HDremove(temp); /*don't care if it fails*/ + } + } + else { + HDremove(filename); + } + H5Pclose(fapl); + break; + + default: + /* unknown request */ + HDfprintf(stderr, "Unknown IO type request (%d)\n", (int)iot); + HDassert(0 && "Unknown IO type"); + break; + } + } +} diff --git a/tools/src/h5perf/sio_perf.c b/tools/src/h5perf/sio_perf.c new file mode 100644 index 0000000..51a7825 --- /dev/null +++ b/tools/src/h5perf/sio_perf.c @@ -0,0 +1,1437 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the root of the source code * + * distribution tree, or in https://www.hdfgroup.org/licenses. * + * If you do not have access to either file, you may request a copy from * + * help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Serial HDF5 Performance Testing Code + * -------------------------------------- + * + * Portable code to test performance on the different platforms we support. + * This is what the report should look like: + * + * nprocs = Max#Procs + * IO API = POSIXIO + * # Files = 1, # of dsets = 1000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * # Files = 1, # of dsets = 3000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * + * . . . + * + * + * IO API = HDF5 + * # Files = 1, # of dsets = 1000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * # Files = 1, # of dsets = 3000, Elements per dset = 37000 + * Write Results = x MB/s + * Read Results = x MB/s + * + * . . . + * + * + * . . . + * + */ + +/* system header files */ +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +#include "hdf5.h" + +/* our header files */ +#include "sio_perf.h" + +/* useful macros */ +#define TAB_SPACE 4 + +#define ONE_KB 1024 +#define ONE_MB (ONE_KB * ONE_KB) +#define ONE_GB (ONE_MB * ONE_KB) + +#define SIO_POSIX 0x1 +#define SIO_HDF5 0x4 + +/* report 0.0 in case t is zero too */ +#define MB_PER_SEC(bytes, t) (H5_DBL_ABS_EQUAL(t, 0.0) ? 0.0 : ((((double)bytes) / (double)ONE_MB) / (t))) + +#ifndef TRUE +#define TRUE 1 +#endif /* TRUE */ +#ifndef FALSE +#define FALSE (!TRUE) +#endif /* FALSE */ + +/* global variables */ +FILE *output; /* output file */ +int sio_debug_level = 0; /* The debug level: + * 0 - Off + * 1 - Minimal + * 2 - Some more + * 3 - Maximal + * 4 - Maximal & then some + */ + +/* local variables */ +static const char *progname = "h5perf_serial"; + +/* + * Command-line options: The user can specify short or long-named + * parameters. The long-named ones can be partially spelled. When + * adding more, make sure that they don't clash with each other. + */ + +/* + * It seems that only the options that accept additional information + * such as dataset size (-e) require the colon next to it. + */ +static const char * s_opts = "a:A:B:c:Cd:D:e:F:ghi:Imno:p:P:r:stT:v:wx:X:"; +static struct h5_long_options l_opts[] = {{"align", require_arg, 'a'}, + {"alig", require_arg, 'a'}, + {"ali", require_arg, 'a'}, + {"al", require_arg, 'a'}, + {"api", require_arg, 'A'}, + {"ap", require_arg, 'A'}, +#if 0 + /* a sighting of the elusive binary option */ + { "binary", no_arg, 'b' }, + { "binar", no_arg, 'b' }, + { "bina", no_arg, 'b' }, + { "bin", no_arg, 'b' }, + { "bi", no_arg, 'b' }, +#endif /* 0 */ + {"block-size", require_arg, 'B'}, + {"block-siz", require_arg, 'B'}, + {"block-si", require_arg, 'B'}, + {"block-s", require_arg, 'B'}, + {"block-", require_arg, 'B'}, + {"block", require_arg, 'B'}, + {"bloc", require_arg, 'B'}, + {"blo", require_arg, 'B'}, + {"bl", require_arg, 'B'}, + {"chunk", no_arg, 'c'}, + {"chun", no_arg, 'c'}, + {"chu", no_arg, 'c'}, + {"ch", no_arg, 'c'}, + {"collective", no_arg, 'C'}, + {"collectiv", no_arg, 'C'}, + {"collecti", no_arg, 'C'}, + {"collect", no_arg, 'C'}, + {"collec", no_arg, 'C'}, + {"colle", no_arg, 'C'}, + {"coll", no_arg, 'C'}, + {"col", no_arg, 'C'}, + {"co", no_arg, 'C'}, + {"debug", require_arg, 'D'}, + {"debu", require_arg, 'D'}, + {"deb", require_arg, 'D'}, + {"de", require_arg, 'D'}, + {"file-driver", require_arg, 'v'}, + {"file-drive", require_arg, 'v'}, + {"file-driv", require_arg, 'v'}, + {"file-dri", require_arg, 'v'}, + {"file-dr", require_arg, 'v'}, + {"file-d", require_arg, 'v'}, + {"file-", require_arg, 'v'}, + {"file", require_arg, 'v'}, + {"fil", require_arg, 'v'}, + {"fi", require_arg, 'v'}, + {"geometry", no_arg, 'g'}, + {"geometr", no_arg, 'g'}, + {"geomet", no_arg, 'g'}, + {"geome", no_arg, 'g'}, + {"geom", no_arg, 'g'}, + {"geo", no_arg, 'g'}, + {"ge", no_arg, 'g'}, + {"help", no_arg, 'h'}, + {"hel", no_arg, 'h'}, + {"he", no_arg, 'h'}, + {"interleaved", require_arg, 'I'}, + {"interleave", require_arg, 'I'}, + {"interleav", require_arg, 'I'}, + {"interlea", require_arg, 'I'}, + {"interle", require_arg, 'I'}, + {"interl", require_arg, 'I'}, + {"inter", require_arg, 'I'}, + {"inte", require_arg, 'I'}, + {"int", require_arg, 'I'}, + {"in", require_arg, 'I'}, + {"max-num-processes", require_arg, 'P'}, + {"max-num-processe", require_arg, 'P'}, + {"max-num-process", require_arg, 'P'}, + {"max-num-proces", require_arg, 'P'}, + {"max-num-proce", require_arg, 'P'}, + {"max-num-proc", require_arg, 'P'}, + {"max-num-pro", require_arg, 'P'}, + {"max-num-pr", require_arg, 'P'}, + {"max-num-p", require_arg, 'P'}, + {"min-num-processes", require_arg, 'p'}, + {"min-num-processe", require_arg, 'p'}, + {"min-num-process", require_arg, 'p'}, + {"min-num-proces", require_arg, 'p'}, + {"min-num-proce", require_arg, 'p'}, + {"min-num-proc", require_arg, 'p'}, + {"min-num-pro", require_arg, 'p'}, + {"min-num-pr", require_arg, 'p'}, + {"min-num-p", require_arg, 'p'}, + {"max-xfer-size", require_arg, 'X'}, + {"max-xfer-siz", require_arg, 'X'}, + {"max-xfer-si", require_arg, 'X'}, + {"max-xfer-s", require_arg, 'X'}, + {"max-xfer", require_arg, 'X'}, + {"max-xfe", require_arg, 'X'}, + {"max-xf", require_arg, 'X'}, + {"max-x", require_arg, 'X'}, + {"min-xfer-size", require_arg, 'x'}, + {"min-xfer-siz", require_arg, 'x'}, + {"min-xfer-si", require_arg, 'x'}, + {"min-xfer-s", require_arg, 'x'}, + {"min-xfer", require_arg, 'x'}, + {"min-xfe", require_arg, 'x'}, + {"min-xf", require_arg, 'x'}, + {"min-x", require_arg, 'x'}, + {"num-bytes", require_arg, 'e'}, + {"num-byte", require_arg, 'e'}, + {"num-byt", require_arg, 'e'}, + {"num-by", require_arg, 'e'}, + {"num-b", require_arg, 'e'}, + {"num-dsets", require_arg, 'd'}, + {"num-dset", require_arg, 'd'}, + {"num-dse", require_arg, 'd'}, + {"num-ds", require_arg, 'd'}, + {"num-d", require_arg, 'd'}, + {"num-files", require_arg, 'F'}, + {"num-file", require_arg, 'F'}, + {"num-fil", require_arg, 'F'}, + {"num-fi", require_arg, 'F'}, + {"num-f", require_arg, 'F'}, + {"num-iterations", require_arg, 'i'}, + {"num-iteration", require_arg, 'i'}, + {"num-iteratio", require_arg, 'i'}, + {"num-iterati", require_arg, 'i'}, + {"num-iterat", require_arg, 'i'}, + {"num-itera", require_arg, 'i'}, + {"num-iter", require_arg, 'i'}, + {"num-ite", require_arg, 'i'}, + {"num-it", require_arg, 'i'}, + {"num-i", require_arg, 'i'}, + {"order", require_arg, 'r'}, + {"orde", require_arg, 'r'}, + {"ord", require_arg, 'r'}, + {"or", require_arg, 'r'}, + {"output", require_arg, 'o'}, + {"outpu", require_arg, 'o'}, + {"outp", require_arg, 'o'}, + {"out", require_arg, 'o'}, + {"ou", require_arg, 'o'}, + {"extendable", no_arg, 't'}, + {"extendabl", no_arg, 't'}, + {"extendab", no_arg, 't'}, + {"extenda", no_arg, 't'}, + {"extend", no_arg, 't'}, + {"exten", no_arg, 't'}, + {"exte", no_arg, 't'}, + {"ext", no_arg, 't'}, + {"ex", no_arg, 't'}, + {"threshold", require_arg, 'T'}, + {"threshol", require_arg, 'T'}, + {"thresho", require_arg, 'T'}, + {"thresh", require_arg, 'T'}, + {"thres", require_arg, 'T'}, + {"thre", require_arg, 'T'}, + {"thr", require_arg, 'T'}, + {"th", require_arg, 'T'}, + {"write-only", require_arg, 'w'}, + {"write-onl", require_arg, 'w'}, + {"write-on", require_arg, 'w'}, + {"write-o", require_arg, 'w'}, + {"write", require_arg, 'w'}, + {"writ", require_arg, 'w'}, + {"wri", require_arg, 'w'}, + {"wr", require_arg, 'w'}, + {NULL, 0, '\0'}}; + +struct options { + long io_types; /* bitmask of which I/O types to test */ + const char *output_file; /* file to print report to */ + long num_dsets; /* number of datasets */ + long num_files; /* number of files */ + off_t num_bpp; /* number of bytes per proc per dset */ + int num_iters; /* number of iterations */ + hsize_t dset_size[MAX_DIMS]; /* Dataset size */ + size_t buf_size[MAX_DIMS]; /* Buffer size */ + size_t chk_size[MAX_DIMS]; /* Chunk size */ + int order[MAX_DIMS]; /* Dimension access order */ + int dset_rank; /* Rank */ + int buf_rank; /* Rank */ + int order_rank; /* Rank */ + int chk_rank; /* Rank */ + int print_times; /* print times as well as throughputs */ + int print_raw; /* print raw data throughput info */ + hsize_t h5_alignment; /* alignment in HDF5 file */ + hsize_t h5_threshold; /* threshold for alignment in HDF5 file */ + int h5_use_chunks; /* Make HDF5 dataset chunked */ + int h5_write_only; /* Perform the write tests only */ + int h5_extendable; /* Perform the write tests only */ + int verify; /* Verify data correctness */ + vfdtype vfd; /* File driver */ + size_t page_buffer_size; + size_t page_size; +}; + +typedef struct { + double min; + double max; + double sum; + int num; +} minmax; + +/* local functions */ +static hsize_t parse_size_directive(const char *size); +static struct options *parse_command_line(int argc, const char *argv[]); +static void run_test_loop(struct options *options); +static int run_test(iotype iot, parameters parms, struct options *opts); +static void output_all_info(minmax *mm, int count, int indent_level); +static void get_minmax(minmax *mm, double val); +static void accumulate_minmax_stuff(const minmax *mm, int count, minmax *total_mm); +static void output_results(const struct options *options, const char *name, minmax *table, int table_size, + off_t data_size); +static void output_report(const char *fmt, ...); +static void print_indent(register int indent); +static void usage(const char *prog); +static void report_parameters(struct options *opts); + +/* + * Function: main + * Purpose: Start things up. + * Return: EXIT_SUCCESS or EXIT_FAILURE + * Programmer: Bill Wendling, 30. October 2001 + * Modifications: + */ +int +main(int argc, const char *argv[]) +{ + int exit_value = EXIT_SUCCESS; + struct options *opts = NULL; + +#ifndef STANDALONE + /* Initialize h5tools lib */ + h5tools_init(); +#endif + + output = stdout; + + opts = parse_command_line(argc, argv); + + if (!opts) { + exit_value = EXIT_FAILURE; + goto finish; + } + + if (opts->output_file) { + if ((output = HDfopen(opts->output_file, "w")) == NULL) { + HDfprintf(stderr, "%s: cannot open output file\n", progname); + HDperror(opts->output_file); + goto finish; + } + } + + report_parameters(opts); + + run_test_loop(opts); + +finish: + HDfree(opts); + return exit_value; +} + +/* + * Function: run_test_loop + * Purpose: Run the I/O tests. Write the results to OUTPUT. + * + * - The slowest changing part of the test is the number of + * processors to use. For each loop iteration, we divide that + * number by 2 and rerun the test. + * + * - The second slowest is what type of IO API to perform. We have + * three choices: POSIXIO, and HDF5. + * + * - Then we change the size of the buffer. This information is + * inferred from the number of datasets to create and the number + * of integers to put into each dataset. The backend code figures + * this out. + * + * Return: Nothing + * Programmer: Bill Wendling, 30. October 2001 + * Modifications: + * Added multidimensional testing (Christian Chilan, April, 2008) + */ +static void +run_test_loop(struct options *opts) +{ + parameters parms; + int i; + size_t buf_bytes; + + /* load options into parameter structure */ + parms.num_files = opts->num_files; + parms.num_dsets = opts->num_dsets; + parms.num_iters = opts->num_iters; + parms.rank = opts->dset_rank; + parms.h5_align = opts->h5_alignment; + parms.h5_thresh = opts->h5_threshold; + parms.h5_use_chunks = opts->h5_use_chunks; + parms.h5_extendable = opts->h5_extendable; + parms.h5_write_only = opts->h5_write_only; + parms.verify = opts->verify; + parms.vfd = opts->vfd; + parms.page_buffer_size = opts->page_buffer_size; + parms.page_size = opts->page_size; + + /* load multidimensional options */ + parms.num_bytes = 1; + buf_bytes = 1; + for (i = 0; i < parms.rank; i++) { + parms.buf_size[i] = opts->buf_size[i]; + parms.dset_size[i] = opts->dset_size[i]; + parms.chk_size[i] = opts->chk_size[i]; + parms.order[i] = opts->order[i]; + parms.num_bytes *= opts->dset_size[i]; + buf_bytes *= opts->buf_size[i]; + } + + /* print size information */ + output_report("Transfer Buffer Size (bytes): %d\n", buf_bytes); + output_report("File Size(MB): %.2f\n", ((double)parms.num_bytes) / ONE_MB); + + print_indent(0); + if (opts->io_types & SIO_POSIX) + run_test(POSIXIO, parms, opts); + + print_indent(0); + if (opts->io_types & SIO_HDF5) + run_test(HDF5, parms, opts); +} + +/* + * Function: run_test + * Purpose: Inner loop call to actually run the I/O test. + * Return: Nothing + * Programmer: Bill Wendling, 18. December 2001 + * Modifications: + */ +static int +run_test(iotype iot, parameters parms, struct options *opts) +{ + results res; + register int i, ret_value = SUCCESS; + off_t raw_size; + minmax * write_sys_mm_table = NULL; + minmax * write_mm_table = NULL; + minmax * write_gross_mm_table = NULL; + minmax * write_raw_mm_table = NULL; + minmax * read_sys_mm_table = NULL; + minmax * read_mm_table = NULL; + minmax * read_gross_mm_table = NULL; + minmax * read_raw_mm_table = NULL; + minmax write_sys_mm = {0.0F, 0.0F, 0.0F, 0}; + minmax write_mm = {0.0F, 0.0F, 0.0F, 0}; + minmax write_gross_mm = {0.0F, 0.0F, 0.0F, 0}; + minmax write_raw_mm = {0.0F, 0.0F, 0.0F, 0}; + minmax read_sys_mm = {0.0F, 0.0F, 0.0F, 0}; + minmax read_mm = {0.0F, 0.0F, 0.0F, 0}; + minmax read_gross_mm = {0.0F, 0.0F, 0.0F, 0}; + minmax read_raw_mm = {0.0F, 0.0F, 0.0F, 0}; + + raw_size = (off_t)parms.num_bytes; + parms.io_type = iot; + print_indent(2); + output_report("IO API = "); + + switch (iot) { + case POSIXIO: + output_report("POSIX\n"); + break; + case HDF5: + output_report("HDF5\n"); + break; + default: + /* unknown request */ + HDfprintf(stderr, "Unknown IO type request (%d)\n", (int)iot); + HDassert(0 && "Unknown IO tpe"); + break; + } + + /* allocate space for tables minmax and that it is sufficient */ + /* to initialize all elements to zeros by calloc. */ + write_sys_mm_table = (minmax *)calloc((size_t)parms.num_iters, sizeof(minmax)); + write_mm_table = (minmax *)calloc((size_t)parms.num_iters, sizeof(minmax)); + write_gross_mm_table = (minmax *)calloc((size_t)parms.num_iters, sizeof(minmax)); + write_raw_mm_table = (minmax *)calloc((size_t)parms.num_iters, sizeof(minmax)); + + if (!parms.h5_write_only) { + read_sys_mm_table = (minmax *)calloc((size_t)parms.num_iters, sizeof(minmax)); + read_mm_table = (minmax *)calloc((size_t)parms.num_iters, sizeof(minmax)); + read_gross_mm_table = (minmax *)calloc((size_t)parms.num_iters, sizeof(minmax)); + read_raw_mm_table = (minmax *)calloc((size_t)parms.num_iters, sizeof(minmax)); + } + + /* Do IO iteration times, collecting statistics each time */ + for (i = 0; i < parms.num_iters; ++i) { + double t; + + do_sio(parms, &res); + + /* gather all of the "sys write" times */ + t = io_time_get(res.timers, HDF5_MPI_WRITE); + get_minmax(&write_sys_mm, t); + + write_sys_mm_table[i] = write_sys_mm; + + /* gather all of the "write" times */ + t = io_time_get(res.timers, HDF5_FINE_WRITE_FIXED_DIMS); + get_minmax(&write_mm, t); + + write_mm_table[i] = write_mm; + + /* gather all of the "write" times from open to close */ + t = io_time_get(res.timers, HDF5_GROSS_WRITE_FIXED_DIMS); + get_minmax(&write_gross_mm, t); + + write_gross_mm_table[i] = write_gross_mm; + + /* gather all of the raw "write" times */ + t = io_time_get(res.timers, HDF5_RAW_WRITE_FIXED_DIMS); + get_minmax(&write_raw_mm, t); + + write_raw_mm_table[i] = write_raw_mm; + + if (!parms.h5_write_only) { + /* gather all of the "mpi read" times */ + t = io_time_get(res.timers, HDF5_MPI_READ); + get_minmax(&read_sys_mm, t); + + read_sys_mm_table[i] = read_sys_mm; + + /* gather all of the "read" times */ + t = io_time_get(res.timers, HDF5_FINE_READ_FIXED_DIMS); + get_minmax(&read_mm, t); + + read_mm_table[i] = read_mm; + + /* gather all of the "read" times from open to close */ + t = io_time_get(res.timers, HDF5_GROSS_READ_FIXED_DIMS); + get_minmax(&read_gross_mm, t); + + read_gross_mm_table[i] = read_gross_mm; + + /* gather all of the raw "read" times */ + t = io_time_get(res.timers, HDF5_RAW_READ_FIXED_DIMS); + get_minmax(&read_raw_mm, t); + + read_raw_mm_table[i] = read_gross_mm; + } + io_time_destroy(res.timers); + } + + /* + * Show various statistics + */ + /* Write statistics */ + /* Print the raw data throughput if desired */ + if (opts->print_raw) { + /* accumulate and output the max, min, and average "raw write" times */ + if (sio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Raw Data Write details:\n"); + output_all_info(write_raw_mm_table, parms.num_iters, 4); + } + + output_results(opts, "Raw Data Write", write_raw_mm_table, parms.num_iters, raw_size); + } /* end if */ + + /* show sys write statics */ +#if 0 + if (sio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("MPI Write details:\n"); + output_all_info(write_sys_mm_table, parms.num_iters, 4); + } +#endif + /* We don't currently output the MPI write results */ + + /* accumulate and output the max, min, and average "write" times */ + if (sio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Write details:\n"); + output_all_info(write_mm_table, parms.num_iters, 4); + } + + output_results(opts, "Write", write_mm_table, parms.num_iters, raw_size); + + /* accumulate and output the max, min, and average "gross write" times */ + if (sio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Write Open-Close details:\n"); + output_all_info(write_gross_mm_table, parms.num_iters, 4); + } + + output_results(opts, "Write Open-Close", write_gross_mm_table, parms.num_iters, raw_size); + + if (!parms.h5_write_only) { + /* Read statistics */ + /* Print the raw data throughput if desired */ + if (opts->print_raw) { + /* accumulate and output the max, min, and average "raw read" times */ + if (sio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Raw Data Read details:\n"); + output_all_info(read_raw_mm_table, parms.num_iters, 4); + } + + output_results(opts, "Raw Data Read", read_raw_mm_table, parms.num_iters, raw_size); + } /* end if */ + + /* show mpi read statics */ +#if 0 + if (sio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("MPI Read details:\n"); + output_all_info(read_sys_mm_table, parms.num_iters, 4); + } +#endif + /* We don't currently output the MPI read results */ + + /* accumulate and output the max, min, and average "read" times */ + if (sio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Read details:\n"); + output_all_info(read_mm_table, parms.num_iters, 4); + } + + output_results(opts, "Read", read_mm_table, parms.num_iters, raw_size); + + /* accumulate and output the max, min, and average "gross read" times */ + if (sio_debug_level >= 3) { + /* output all of the times for all iterations */ + print_indent(3); + output_report("Read Open-Close details:\n"); + output_all_info(read_gross_mm_table, parms.num_iters, 4); + } + + output_results(opts, "Read Open-Close", read_gross_mm_table, parms.num_iters, raw_size); + } + + /* clean up our mess */ + HDfree(write_sys_mm_table); + HDfree(write_mm_table); + HDfree(write_gross_mm_table); + HDfree(write_raw_mm_table); + + if (!parms.h5_write_only) { + HDfree(read_sys_mm_table); + HDfree(read_mm_table); + HDfree(read_gross_mm_table); + HDfree(read_raw_mm_table); + } + + return ret_value; +} + +/* + * Function: output_all_info + * Purpose: + * Return: Nothing + * Programmer: Bill Wendling, 29. January 2002 + * Modifications: + */ +static void +output_all_info(minmax *mm, int count, int indent_level) +{ + int i; + + for (i = 0; i < count; ++i) { + print_indent(indent_level); + output_report("Iteration %d:\n", i + 1); + print_indent(indent_level + 1); + output_report("Minimum Time: %.2fs\n", mm[i].min); + print_indent(indent_level + 1); + output_report("Maximum Time: %.2fs\n", mm[i].max); + } +} + +/* + * Function: get_minmax + * Purpose: Gather all the min, max and total of val. + * Return: Nothing + * Programmer: Bill Wendling, 21. December 2001 + * Modifications: + * Use MPI_Allreduce to do it. -akc, 2002/01/11 + */ + +static void +get_minmax(minmax *mm, double val) +{ + mm->max = val; + mm->min = val; + mm->sum = val; +} + +/* + * Function: accumulate_minmax_stuff + * Purpose: Accumulate the minimum, maximum, and average of the times + * across all processes. + * Return: TOTAL_MM - the total of all of these. + * Programmer: Bill Wendling, 21. December 2001 + * Modifications: + * Changed to use seconds instead of MB/s - QAK, 5/9/02 + */ +static void +accumulate_minmax_stuff(const minmax *mm, int count, minmax *total_mm) +{ + int i; + + total_mm->sum = 0.0F; + total_mm->max = -DBL_MAX; + total_mm->min = DBL_MAX; + total_mm->num = count; + + for (i = 0; i < count; ++i) { + double m = mm[i].max; + + total_mm->sum += m; + + if (m < total_mm->min) + total_mm->min = m; + + if (m > total_mm->max) + total_mm->max = m; + } +} + +/* + * Function: output_results + * Purpose: Print information about the time & bandwidth for a given + * minmax & # of iterations. + * Return: Nothing + * Programmer: Quincey Koziol, 9. May 2002 + * Modifications: + */ +static void +output_results(const struct options *opts, const char *name, minmax *table, int table_size, off_t data_size) +{ + minmax total_mm; + + accumulate_minmax_stuff(table, table_size, &total_mm); + + print_indent(3); + output_report("%s (%d iteration(s)):\n", name, table_size); + + /* Note: The maximum throughput uses the minimum amount of time & vice versa */ + + print_indent(4); + output_report("Maximum Throughput: %6.2f MB/s", MB_PER_SEC(data_size, total_mm.min)); + if (opts->print_times) + output_report(" (%7.3f s)\n", total_mm.min); + else + output_report("\n"); + + print_indent(4); + output_report("Average Throughput: %6.2f MB/s", MB_PER_SEC(data_size, total_mm.sum / total_mm.num)); + if (opts->print_times) + output_report(" (%7.3f s)\n", (total_mm.sum / total_mm.num)); + else + output_report("\n"); + + print_indent(4); + output_report("Minimum Throughput: %6.2f MB/s", MB_PER_SEC(data_size, total_mm.max)); + if (opts->print_times) + output_report(" (%7.3f s)\n", total_mm.max); + else + output_report("\n"); +} + +/* + * Function: output_report + * Purpose: Print a line of the report. Only do so if I'm the 0 process. + * Return: Nothing + * Programmer: Bill Wendling, 19. December 2001 + * Modifications: + */ +static void +output_report(const char *fmt, ...) +{ + va_list ap; + + HDva_start(ap, fmt); + HDvfprintf(output, fmt, ap); + HDva_end(ap); +} + +/* + * Function: print_indent + * Purpose: Print spaces to indent a new line of text for pretty printing + * things. + * Return: Nothing + * Programmer: Bill Wendling, 29. October 2001 + * Modifications: + */ +static void +print_indent(register int indent) +{ + indent *= TAB_SPACE; + + for (; indent > 0; --indent) + HDfputc(' ', output); +} + +static void +recover_size_and_print(long long val, const char *end) +{ + if (val >= ONE_KB && (val % ONE_KB) == 0) { + if (val >= ONE_MB && (val % ONE_MB) == 0) { + if (val >= ONE_GB && (val % ONE_GB) == 0) + HDfprintf(output, + "%" H5_PRINTF_LL_WIDTH "d" + "GB%s", + val / ONE_GB, end); + else + HDfprintf(output, + "%" H5_PRINTF_LL_WIDTH "d" + "MB%s", + val / ONE_MB, end); + } + else { + HDfprintf(output, + "%" H5_PRINTF_LL_WIDTH "d" + "KB%s", + val / ONE_KB, end); + } + } + else { + HDfprintf(output, + "%" H5_PRINTF_LL_WIDTH "d" + "%s", + val, end); + } +} + +static void +print_io_api(long io_types) +{ + if (io_types & SIO_POSIX) + HDfprintf(output, "posix "); + if (io_types & SIO_HDF5) + HDfprintf(output, "hdf5 "); + HDfprintf(output, "\n"); +} + +static void +report_parameters(struct options *opts) +{ + int i, rank; + rank = opts->dset_rank; + + print_version("HDF5 Library"); /* print library version */ + HDfprintf(output, "==== Parameters ====\n"); + + HDfprintf(output, "IO API="); + print_io_api(opts->io_types); + + HDfprintf(output, "Number of iterations=%d\n", opts->num_iters); + + HDfprintf(output, "Dataset size="); + + for (i = 0; i < rank; i++) + recover_size_and_print((long long)opts->dset_size[i], " "); + HDfprintf(output, "\n"); + + HDfprintf(output, "Transfer buffer size="); + for (i = 0; i < rank; i++) + recover_size_and_print((long long)opts->buf_size[i], " "); + HDfprintf(output, "\n"); + + if (opts->page_size) { + HDfprintf(output, "Page Aggregation Enabled. Page size = %zu\n", opts->page_size); + if (opts->page_buffer_size) + HDfprintf(output, "Page Buffering Enabled. Page Buffer size = %zu\n", opts->page_buffer_size); + else + HDfprintf(output, "Page Buffering Disabled\n"); + } + else + HDfprintf(output, "Page Aggregation Disabled\n"); + + HDfprintf(output, "Dimension access order="); + for (i = 0; i < rank; i++) + recover_size_and_print((long long)opts->order[i], " "); + HDfprintf(output, "\n"); + + if (opts->io_types & SIO_HDF5) { + + HDfprintf(output, "HDF5 data storage method="); + + if (opts->h5_use_chunks) { + + HDfprintf(output, "Chunked\n"); + HDfprintf(output, "HDF5 chunk size="); + for (i = 0; i < rank; i++) + recover_size_and_print((long long)opts->chk_size[i], " "); + HDfprintf(output, "\n"); + + HDfprintf(output, "HDF5 dataset dimensions="); + if (opts->h5_extendable) { + HDfprintf(output, "Extendable\n"); + } + else { + HDfprintf(output, "Fixed\n"); + } + } + else { + HDfprintf(output, "Contiguous\n"); + } + + HDfprintf(output, "HDF5 file driver="); + if (opts->vfd == sec2) { + HDfprintf(output, "sec2\n"); + } + else if (opts->vfd == stdio) { + HDfprintf(output, "stdio\n"); + } + else if (opts->vfd == core) { + HDfprintf(output, "core\n"); + } + else if (opts->vfd == split) { + HDfprintf(output, "split\n"); + } + else if (opts->vfd == multi) { + HDfprintf(output, "multi\n"); + } + else if (opts->vfd == family) { + HDfprintf(output, "family\n"); + } + else if (opts->vfd == direct) { + HDfprintf(output, "direct\n"); + } + } + + { + char *prefix = HDgetenv("HDF5_PREFIX"); + + HDfprintf(output, "Env HDF5_PREFIX=%s\n", (prefix ? prefix : "not set")); + } + + HDfprintf(output, "==== End of Parameters ====\n"); + HDfprintf(output, "\n"); +} + +/* + * Function: parse_command_line + * Purpose: Parse the command line options and return a STRUCT OPTIONS + * structure which will need to be freed by the calling function. + * Return: Pointer to an OPTIONS structure + * Programmer: Bill Wendling, 31. October 2001 + * Modifications: + * Added multidimensional testing (Christian Chilan, April, 2008) + */ +static struct options * +parse_command_line(int argc, const char *argv[]) +{ + int opt; + struct options *cl_opts; + int i, default_rank, actual_rank, ranks[4]; + + cl_opts = (struct options *)HDmalloc(sizeof(struct options)); + + cl_opts->page_buffer_size = 0; + cl_opts->page_size = 0; + + cl_opts->output_file = NULL; + cl_opts->io_types = 0; /* will set default after parsing options */ + cl_opts->num_iters = 1; + + default_rank = 2; + + cl_opts->dset_rank = 0; + cl_opts->buf_rank = 0; + cl_opts->chk_rank = 0; + cl_opts->order_rank = 0; + + for (i = 0; i < MAX_DIMS; i++) { + cl_opts->buf_size[i] = (size_t)((i + 1) * 10); + cl_opts->dset_size[i] = (hsize_t)((i + 1) * 100); + cl_opts->chk_size[i] = (size_t)((i + 1) * 10); + cl_opts->order[i] = i + 1; + } + + cl_opts->vfd = sec2; + + cl_opts->print_times = FALSE; /* Printing times is off by default */ + cl_opts->print_raw = FALSE; /* Printing raw data throughput is off by default */ + cl_opts->h5_alignment = 1; /* No alignment for HDF5 objects by default */ + cl_opts->h5_threshold = 1; /* No threshold for aligning HDF5 objects by default */ + cl_opts->h5_use_chunks = FALSE; /* Don't chunk the HDF5 dataset by default */ + cl_opts->h5_write_only = FALSE; /* Do both read and write by default */ + cl_opts->h5_extendable = FALSE; /* Use extendable dataset */ + cl_opts->verify = FALSE; /* No Verify data correctness by default */ + + while ((opt = H5_get_option(argc, argv, s_opts, l_opts)) != EOF) { + switch ((char)opt) { + case 'a': + cl_opts->h5_alignment = parse_size_directive(H5_optarg); + break; + case 'G': + cl_opts->page_size = parse_size_directive(H5_optarg); + break; + case 'b': + cl_opts->page_buffer_size = parse_size_directive(H5_optarg); + break; + case 'A': { + const char *end = H5_optarg; + while (end && *end != '\0') { + char buf[10]; + + HDmemset(buf, '\0', sizeof(buf)); + + for (i = 0; *end != '\0' && *end != ','; ++end) + if (HDisalnum(*end) && i < 10) + buf[i++] = *end; + + if (!HDstrcasecmp(buf, "hdf5")) { + cl_opts->io_types |= SIO_HDF5; + } + else if (!HDstrcasecmp(buf, "posix")) { + cl_opts->io_types |= SIO_POSIX; + } + else { + HDfprintf(stderr, "sio_perf: invalid --api option %s\n", buf); + HDexit(EXIT_FAILURE); + } + + if (*end == '\0') + break; + + end++; + } + } + + break; +#if 0 + case 'b': + /* the future "binary" option */ + break; +#endif /* 0 */ + case 'c': + /* Turn on chunked HDF5 dataset creation */ + cl_opts->h5_use_chunks = 1; + { + const char *end = H5_optarg; + int j = 0; + + while (end && *end != '\0') { + char buf[10]; + + HDmemset(buf, '\0', sizeof(buf)); + + for (i = 0; *end != '\0' && *end != ','; ++end) + if (HDisalnum(*end) && i < 10) + buf[i++] = *end; + + cl_opts->chk_size[j] = parse_size_directive(buf); + + j++; + + if (*end == '\0') + break; + + end++; + } + cl_opts->chk_rank = j; + } + + break; + + case 'D': { + const char *end = H5_optarg; + + while (end && *end != '\0') { + char buf[10]; + + HDmemset(buf, '\0', sizeof(buf)); + + for (i = 0; *end != '\0' && *end != ','; ++end) + if (HDisalnum(*end) && i < 10) + buf[i++] = *end; + + if (HDstrlen(buf) > 1 || HDisdigit(buf[0])) { + size_t j; + + for (j = 0; j < 10 && buf[j] != '\0'; ++j) + if (!HDisdigit(buf[j])) { + HDfprintf(stderr, "sio_perf: invalid --debug option %s\n", buf); + HDexit(EXIT_FAILURE); + } + + sio_debug_level = atoi(buf); + + if (sio_debug_level > 4) + sio_debug_level = 4; + else if (sio_debug_level < 0) + sio_debug_level = 0; + } + else { + switch (*buf) { + case 'r': + /* Turn on raw data throughput info */ + cl_opts->print_raw = TRUE; + break; + case 't': + /* Turn on time printing */ + cl_opts->print_times = TRUE; + break; + case 'v': + /* Turn on verify data correctness*/ + cl_opts->verify = TRUE; + break; + default: + HDfprintf(stderr, "sio_perf: invalid --debug option %s\n", buf); + HDexit(EXIT_FAILURE); + } + } + + if (*end == '\0') + break; + + end++; + } + } + + break; + case 'e': { + const char *end = H5_optarg; + int j = 0; + + while (end && *end != '\0') { + char buf[10]; + + HDmemset(buf, '\0', sizeof(buf)); + + for (i = 0; *end != '\0' && *end != ','; ++end) + if (HDisalnum(*end) && i < 10) + buf[i++] = *end; + + cl_opts->dset_size[j] = parse_size_directive(buf); + + j++; + + if (*end == '\0') + break; + + end++; + } + cl_opts->dset_rank = j; + } + + break; + + case 'i': + cl_opts->num_iters = HDatoi(H5_optarg); + break; + case 'o': + cl_opts->output_file = H5_optarg; + break; + case 'T': + cl_opts->h5_threshold = parse_size_directive(H5_optarg); + break; + case 'v': + if (!HDstrcasecmp(H5_optarg, "sec2")) { + cl_opts->vfd = sec2; + } + else if (!HDstrcasecmp(H5_optarg, "stdio")) { + cl_opts->vfd = stdio; + } + else if (!HDstrcasecmp(H5_optarg, "core")) { + cl_opts->vfd = core; + } + else if (!HDstrcasecmp(H5_optarg, "split")) { + cl_opts->vfd = split; + } + else if (!HDstrcasecmp(H5_optarg, "multi")) { + cl_opts->vfd = multi; + } + else if (!HDstrcasecmp(H5_optarg, "family")) { + cl_opts->vfd = family; + } + else if (!HDstrcasecmp(H5_optarg, "direct")) { + cl_opts->vfd = direct; + } + else { + HDfprintf(stderr, "sio_perf: invalid --api option %s\n", H5_optarg); + HDexit(EXIT_FAILURE); + } + break; + case 'w': + cl_opts->h5_write_only = TRUE; + break; + case 't': + cl_opts->h5_extendable = TRUE; + break; + case 'x': { + const char *end = H5_optarg; + int j = 0; + + while (end && *end != '\0') { + char buf[10]; + + HDmemset(buf, '\0', sizeof(buf)); + + for (i = 0; *end != '\0' && *end != ','; ++end) + if (HDisalnum(*end) && i < 10) + buf[i++] = *end; + + cl_opts->buf_size[j] = parse_size_directive(buf); + + j++; + + if (*end == '\0') + break; + + end++; + } + cl_opts->buf_rank = j; + } + + break; + + case 'r': { + const char *end = H5_optarg; + int j = 0; + + while (end && *end != '\0') { + char buf[10]; + + HDmemset(buf, '\0', sizeof(buf)); + + for (i = 0; *end != '\0' && *end != ','; ++end) + if (HDisalnum(*end) && i < 10) + buf[i++] = *end; + + cl_opts->order[j] = (int)parse_size_directive(buf); + + j++; + + if (*end == '\0') + break; + + end++; + } + + cl_opts->order_rank = j; + } + + break; + + case 'h': + case '?': + default: + usage(progname); + HDfree(cl_opts); + return NULL; + } + } + + /* perform rank consistency analysis */ + actual_rank = 0; + + ranks[0] = cl_opts->dset_rank; + ranks[1] = cl_opts->buf_rank; + ranks[2] = cl_opts->order_rank; + ranks[3] = cl_opts->chk_rank; + + for (i = 0; i < 4; i++) { + if (ranks[i] > 0) { + if (!actual_rank) { + actual_rank = ranks[i]; + } + else { + if (actual_rank != ranks[i]) + exit(EXIT_FAILURE); + } + } + } + + if (!actual_rank) + actual_rank = default_rank; + + cl_opts->dset_rank = actual_rank; + cl_opts->buf_rank = actual_rank; + cl_opts->order_rank = actual_rank; + cl_opts->chk_rank = actual_rank; + + for (i = 0; i < actual_rank; i++) { + if (cl_opts->order[i] > actual_rank) { + exit(EXIT_FAILURE); + } + } + + /* set default if none specified yet */ + if (!cl_opts->io_types) + cl_opts->io_types = SIO_HDF5 | SIO_POSIX; /* run all API */ + + /* verify parameters sanity. Adjust if needed. */ + /* cap xfer_size with bytes per process */ + if (cl_opts->num_iters <= 0) + cl_opts->num_iters = 1; + + return cl_opts; +} + +/* + * Function: parse_size_directive + * Purpose: Parse the size directive passed on the commandline. The size + * directive is an integer followed by a size indicator: + * + * K, k - Kilobyte + * M, m - Megabyte + * G, g - Gigabyte + * + * Return: The size as a off_t because this is related to file size. + * If an unknown size indicator is used, then the program will + * exit with EXIT_FAILURE as the return value. + * Programmer: Bill Wendling, 18. December 2001 + * Modifications: + */ + +static hsize_t +parse_size_directive(const char *size) +{ + hsize_t s; + char * endptr; + + s = HDstrtoull(size, &endptr, 10); + + if (endptr && *endptr) { + while (*endptr != '\0' && (*endptr == ' ' || *endptr == '\t')) + ++endptr; + + switch (*endptr) { + case 'K': + case 'k': + s *= ONE_KB; + break; + + case 'M': + case 'm': + s *= ONE_MB; + break; + + case 'G': + case 'g': + s *= ONE_GB; + break; + + default: + HDfprintf(stderr, "Illegal size specifier '%c'\n", *endptr); + HDexit(EXIT_FAILURE); + } + } + + return s; +} + +/* + * Function: usage + * Purpose: Print a usage message and then exit. + * Return: Nothing + * Programmer: Bill Wendling, 31. October 2001 + * Modifications: + */ +static void +usage(const char *prog) +{ + print_version(prog); + HDprintf("usage: %s [OPTIONS]\n", prog); + HDprintf(" OPTIONS\n"); + HDprintf(" -h Print an usage message and exit\n"); + HDprintf(" -A AL Which APIs to test\n"); + HDprintf(" [default: all of them]\n"); + HDprintf(" -c SL Selects chunked storage and defines chunks dimensions\n"); + HDprintf(" and sizes\n"); + HDprintf(" [default: Off]\n"); + HDprintf(" -e SL Dimensions and sizes of dataset\n"); + HDprintf(" [default: 100,200]\n"); + HDprintf(" -i N Number of iterations to perform\n"); + HDprintf(" [default: 1]\n"); + HDprintf(" -r NL Dimension access order (see below for description)\n"); + HDprintf(" [default: 1,2]\n"); + HDprintf(" -t Selects extendable dimensions for HDF5 dataset\n"); + HDprintf(" [default: Off]\n"); + HDprintf(" -v VFD Selects file driver for HDF5 access\n"); + HDprintf(" [default: sec2]\n"); + HDprintf(" -w Perform write tests, not the read tests\n"); + HDprintf(" [default: Off]\n"); + HDprintf(" -x SL Dimensions and sizes of the transfer buffer\n"); + HDprintf(" [default: 10,20]\n"); + HDprintf("\n"); + HDprintf(" N - is an integer > 0.\n"); + HDprintf("\n"); + HDprintf(" S - is a size specifier, an integer > 0 followed by a size indicator:\n"); + HDprintf(" K - Kilobyte (%d)\n", ONE_KB); + HDprintf(" M - Megabyte (%d)\n", ONE_MB); + HDprintf(" G - Gigabyte (%d)\n", ONE_GB); + HDprintf("\n"); + HDprintf(" Example: '37M' is 37 megabytes or %d bytes\n", 37 * ONE_MB); + HDprintf("\n"); + HDprintf(" AL - is an API list. Valid values are:\n"); + HDprintf(" hdf5 - HDF5\n"); + HDprintf(" posix - POSIX\n"); + HDprintf("\n"); + HDprintf(" Example: -A posix,hdf5\n"); + HDprintf("\n"); + HDprintf(" NL - is list of integers (N) separated by commas.\n"); + HDprintf("\n"); + HDprintf(" Example: 1,2,3\n"); + HDprintf("\n"); + HDprintf(" SL - is list of size specifiers (S) separated by commas.\n"); + HDprintf("\n"); + HDprintf(" Example: 2K,2K,3K\n"); + HDprintf("\n"); + HDprintf(" The example defines an object (dataset, tranfer buffer) with three\n"); + HDprintf(" dimensions. Be aware that as the number of dimensions increases, the\n"); + HDprintf(" the total size of the object increases exponentially.\n"); + HDprintf("\n"); + HDprintf(" VFD - is an HDF5 file driver specifier. Valid values are:\n"); + HDprintf(" sec2, stdio, core, split, multi, family, direct\n"); + HDprintf("\n"); + HDprintf(" Dimension access order:\n"); + HDprintf(" Data access starts at the cardinal origin of the dataset using the\n"); + HDprintf(" transfer buffer. The next access occurs on a dataset region next to\n"); + HDprintf(" the previous one. For a multidimensional dataset, there are several\n"); + HDprintf(" directions as to where to proceed. This can be specified in the dimension\n"); + HDprintf(" access order. For example, -r 1,2 states that the tool should traverse\n"); + HDprintf(" dimension 1 first, and then dimension 2.\n"); + HDprintf("\n"); + HDprintf(" Environment variables:\n"); + HDprintf(" HDF5_NOCLEANUP Do not remove data files if set [default remove]\n"); + HDprintf(" HDF5_PREFIX Data file prefix\n"); + HDprintf("\n"); + HDfflush(stdout); +} /* end usage() */ diff --git a/tools/src/h5perf/sio_perf.h b/tools/src/h5perf/sio_perf.h new file mode 100644 index 0000000..d998377 --- /dev/null +++ b/tools/src/h5perf/sio_perf.h @@ -0,0 +1,104 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the root of the source code * + * distribution tree, or in https://www.hdfgroup.org/licenses. * + * If you do not have access to either file, you may request a copy from * + * help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#ifndef SIO_PERF_H +#define SIO_PERF_H + +#ifndef STANDALONE +#include "io_timer.h" +#include "H5private.h" +#include "h5tools.h" +#include "h5tools_utils.h" +#else +#include "io_timer.h" +#include "sio_standalone.h" +#endif + +/* setup the dataset no fill option if this is v1.5 or more */ +#if H5_VERS_MAJOR > 1 || H5_VERS_MINOR > 4 +#define H5_HAVE_NOFILL 1 +#endif + +#define MAX_DIMS 32 + +typedef enum iotype_ { + POSIXIO, + HDF5 + /*NUM_TYPES*/ +} iotype; + +typedef enum vfdtype_ { + sec2, + stdio, + core, + split, + multi, + family, + direct + /*NUM_TYPES*/ +} vfdtype; + +typedef struct parameters_ { + iotype io_type; /* The type of IO test to perform */ + vfdtype vfd; + long num_files; /* Number of files to create */ + long num_dsets; /* Number of datasets to create */ + hsize_t num_bytes; /* Number of bytes in each dset */ + int num_iters; /* Number of times to loop doing the IO */ + int rank; /* Rank of dataset */ + hsize_t dset_size[MAX_DIMS]; /* Dataset size */ + size_t buf_size[MAX_DIMS]; /* Buffer size */ + size_t chk_size[MAX_DIMS]; /* Chunk size */ + int order[MAX_DIMS]; /* Buffer size */ + hsize_t h5_align; /* HDF5 object alignment */ + hsize_t h5_thresh; /* HDF5 object alignment threshold */ + int h5_use_chunks; /* Make HDF5 dataset chunked */ + int h5_extendable; /* Make HDF5 dataset chunked */ + int h5_write_only; /* Perform the write tests only */ + int verify; /* Verify data correctness */ + size_t page_size; + size_t page_buffer_size; +} parameters; + +typedef struct results_ { + herr_t ret_code; + io_time_t *timers; +} results; + +#ifndef SUCCESS +#define SUCCESS 0 +#endif /* !SUCCESS */ + +#ifndef FAIL +#define FAIL -1 +#endif /* !FAIL */ + +extern FILE * output; /* output file */ +extern io_time_t *timer_g; /* timer: global for stub functions */ +extern int sio_debug_level; /* The debug level: + * 0 - Off + * 1 - Minimal + * 2 - Some more + * 3 - Maximal + * 4 - Even More Debugging (timer stuff) + */ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +extern void do_sio(parameters param, results *res); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* SIO_PERF_H */ |