diff options
Diffstat (limited to 'tools/src/h5perf/pio_engine.c')
-rw-r--r-- | tools/src/h5perf/pio_engine.c | 2745 |
1 files changed, 2745 insertions, 0 deletions
diff --git a/tools/src/h5perf/pio_engine.c b/tools/src/h5perf/pio_engine.c new file mode 100644 index 0000000..cac36d7 --- /dev/null +++ b/tools/src/h5perf/pio_engine.c @@ -0,0 +1,2745 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the root of the source code * + * distribution tree, or in https://www.hdfgroup.org/licenses. * + * If you do not have access to either file, you may request a copy from * + * help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Author: Albert Cheng of NCSA, Oct 24, 2001. + */ + +#include "hdf5.h" + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> + +#ifdef H5_HAVE_UNISTD_H +#include <sys/types.h> +#include <unistd.h> +#endif + +#ifdef H5_HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif + +#ifdef H5_HAVE_PARALLEL + +#include <mpi.h> + +#ifndef MPI_FILE_NULL /*MPIO may be defined in mpi.h already */ +#include <mpio.h> +#endif /* !MPI_FILE_NULL */ + +#include "pio_perf.h" + +/* Macro definitions */ + +#if H5_VERS_MAJOR == 1 && H5_VERS_MINOR == 6 +#define H5DCREATE(fd, name, type, space, dcpl) H5Dcreate(fd, name, type, space, dcpl) +#define H5DOPEN(fd, name) H5Dopen(fd, name) +#else +#define H5DCREATE(fd, name, type, space, dcpl) \ + H5Dcreate2(fd, name, type, space, H5P_DEFAULT, dcpl, H5P_DEFAULT) +#define H5DOPEN(fd, name) H5Dopen2(fd, name, H5P_DEFAULT) +#endif + +/* sizes of various items. these sizes won't change during program execution */ +/* The following three must have the same type */ +#define ELMT_H5_TYPE H5T_NATIVE_UCHAR + +#define GOTOERROR(errcode) \ + { \ + ret_code = errcode; \ + goto done; \ + } +#define ERRMSG(mesg) \ + { \ + HDfprintf(stderr, "Proc %d: ", pio_mpi_rank_g); \ + HDfprintf(stderr, "*** Assertion failed (%s) at line %4d in %s\n", mesg, (int)__LINE__, __FILE__); \ + } + +/* verify: if val is false (0), print mesg. */ +#define VRFY(val, mesg) \ + do { \ + if (!val) { \ + ERRMSG(mesg); \ + GOTOERROR(FAIL); \ + } \ + } while (0) + +/* POSIX I/O macros */ +#ifdef H5_HAVE_WIN32_API +/* Can't link against the library, so this test will use the older, non-Unicode + * _open() call on Windows. + */ +#define HDopen(S, F, ...) _open(S, F | _O_BINARY, __VA_ARGS__) +#endif /* H5_HAVE_WIN32_API */ +#define POSIXCREATE(fn) HDopen(fn, O_CREAT | O_TRUNC | O_RDWR, 0600) +#define POSIXOPEN(fn, F) HDopen(fn, F, 0600) +#define POSIXCLOSE(F) HDclose(F) +#define POSIXSEEK(F, L) HDlseek(F, L, SEEK_SET) +#define POSIXWRITE(F, B, S) HDwrite(F, B, S) +#define POSIXREAD(F, B, S) HDread(F, B, S) + +enum { PIO_CREATE = 1, PIO_WRITE = 2, PIO_READ = 4 }; + +/* Global variables */ +static int clean_file_g = -1; /*whether to cleanup temporary test */ +/*files. -1 is not defined; */ +/*0 is no cleanup; 1 is do cleanup */ + +/* + * In a parallel machine, the filesystem suitable for compiling is + * unlikely a parallel file system that is suitable for parallel I/O. + * There is no standard pathname for the parallel file system. /tmp + * is about the best guess. + */ +#ifndef HDF5_PARAPREFIX +#define HDF5_PARAPREFIX "" +#endif /* !HDF5_PARAPREFIX */ + +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif /* !MIN */ + +/* the different types of file descriptors we can expect */ +typedef union _file_descr { + int posixfd; /* POSIX file handle*/ + MPI_File mpifd; /* MPI file */ + hid_t h5fd; /* HDF5 file */ +} file_descr; + +/* local functions */ +static char * pio_create_filename(iotype iot, const char *base_name, char *fullname, size_t size); +static herr_t do_write(results *res, file_descr *fd, parameters *parms, long ndsets, off_t nelmts, + size_t buf_size, void *buffer); +static herr_t do_read(results *res, file_descr *fd, parameters *parms, long ndsets, off_t nelmts, + size_t buf_size, void *buffer /*out*/); +static herr_t do_fopen(parameters *param, char *fname, file_descr *fd /*out*/, int flags); +static herr_t do_fclose(iotype iot, file_descr *fd); +static void do_cleanupfile(iotype iot, char *fname); +static off_t sqrto(off_t); + +/* + * Function: do_pio + * Purpose: PIO Engine where Parallel IO are executed. + * Return: results + * Programmer: Albert Cheng, Bill Wendling 2001/12/12 + * Modifications: + * Added 2D testing (Christian Chilan, 10. August 2005) + */ +results +do_pio(parameters param) +{ + /* return codes */ + herr_t ret_code = 0; /*return code */ + results res; + + file_descr fd; + iotype iot; + + char fname[FILENAME_MAX]; + long nf; + long ndsets; + off_t nbytes; /*number of bytes per dataset */ + off_t snbytes; /*general dataset size */ + /*for 1D, it is the actual dataset size */ + /*for 2D, it is the size of a side of the dataset square */ + char * buffer = NULL; /*data buffer pointer */ + size_t buf_size; /*general buffer size in bytes */ + /*for 1D, it is the actual buffer size */ + /*for 2D, it is the length of the buffer rectangle */ + size_t blk_size; /*data block size in bytes */ + size_t bsize; /*actual buffer size */ + + /* HDF5 variables */ + herr_t hrc; /*HDF5 return code */ + + /* Sanity check parameters */ + + /* IO type */ + iot = param.io_type; + + switch (iot) { + case MPIO: + fd.mpifd = MPI_FILE_NULL; + res.timers = io_time_new(MPI_CLOCK); + break; + case POSIXIO: + fd.posixfd = -1; + res.timers = io_time_new(MPI_CLOCK); + break; + case PHDF5: + fd.h5fd = -1; + res.timers = io_time_new(MPI_CLOCK); + break; + default: + /* unknown request */ + HDfprintf(stderr, "Unknown IO type request (%d)\n", iot); + GOTOERROR(FAIL); + } + + ndsets = param.num_dsets; /* number of datasets per file */ + nbytes = param.num_bytes; /* number of bytes per dataset */ + buf_size = param.buf_size; + blk_size = param.blk_size; + + if (!param.dim2d) { + snbytes = nbytes; /* General dataset size */ + bsize = buf_size; /* Actual buffer size */ + } + else { + snbytes = sqrto(nbytes); /* General dataset size */ + bsize = buf_size * blk_size; /* Actual buffer size */ + } + + if (param.num_files < 0) { + HDfprintf(stderr, "number of files must be >= 0 (%ld)\n", param.num_files); + GOTOERROR(FAIL); + } + + if (ndsets < 0) { + HDfprintf(stderr, "number of datasets per file must be >= 0 (%ld)\n", ndsets); + GOTOERROR(FAIL); + } + + if (param.num_procs <= 0) { + HDfprintf(stderr, "maximum number of process to use must be > 0 (%d)\n", param.num_procs); + GOTOERROR(FAIL); + } + + /* Validate transfer buffer size & block size*/ + if (blk_size <= 0) { + HDfprintf(stderr, "Transfer block size (%zu) must be > 0\n", blk_size); + GOTOERROR(FAIL); + } + if (buf_size <= 0) { + HDfprintf(stderr, "Transfer buffer size (%zu) must be > 0\n", buf_size); + GOTOERROR(FAIL); + } + if ((buf_size % blk_size) != 0) { + HDfprintf(stderr, + "Transfer buffer size (%zu) must be a multiple of the " + "interleaved I/O block size (%zu)\n", + buf_size, blk_size); + GOTOERROR(FAIL); + } + if ((snbytes % pio_mpi_nprocs_g) != 0) { + HDfprintf(stderr, + "Dataset size (%" H5_PRINTF_LL_WIDTH "d) must be a multiple of the " + "number of processes (%d)\n", + (long long)snbytes, pio_mpi_nprocs_g); + GOTOERROR(FAIL); + } + + if (!param.dim2d) { + if (((size_t)(snbytes / pio_mpi_nprocs_g) % buf_size) != 0) { + HDfprintf(stderr, + "Dataset size/process (%" H5_PRINTF_LL_WIDTH "d) must be a multiple of the " + "trasfer buffer size (%zu)\n", + (long long)(snbytes / pio_mpi_nprocs_g), buf_size); + GOTOERROR(FAIL); + } + } + else { + if (((size_t)snbytes % buf_size) != 0) { + HDfprintf(stderr, + "Dataset side size (%" H5_PRINTF_LL_WIDTH "d) must be a multiple of the " + "trasfer buffer size (%zu)\n", + (long long)snbytes, buf_size); + GOTOERROR(FAIL); + } + } + + /* Allocate transfer buffer */ + if ((buffer = malloc(bsize)) == NULL) { + HDfprintf(stderr, "malloc for transfer buffer size (%zu) failed\n", bsize); + GOTOERROR(FAIL); + } + + if (pio_debug_level >= 4) { + int myrank; + + MPI_Comm_rank(pio_comm_g, &myrank); + + /* output all of the times for all iterations */ + if (myrank == 0) + HDfprintf(output, "Timer details:\n"); + } + + for (nf = 1; nf <= param.num_files; nf++) { + /* + * Write performance measurement + */ + /* Open file for write */ + char base_name[256]; + + HDsprintf(base_name, "#pio_tmp_%lu", nf); + pio_create_filename(iot, base_name, fname, sizeof(fname)); + if (pio_debug_level > 0) + HDfprintf(output, "rank %d: data filename=%s\n", pio_mpi_rank_g, fname); + + /* Need barrier to make sure everyone starts at the same time */ + MPI_Barrier(pio_comm_g); + + io_time_set(res.timers, HDF5_GROSS_WRITE_FIXED_DIMS, TSTART); + hrc = do_fopen(¶m, fname, &fd, PIO_CREATE | PIO_WRITE); + + VRFY((hrc == SUCCESS), "do_fopen failed"); + + io_time_set(res.timers, HDF5_FINE_WRITE_FIXED_DIMS, TSTART); + hrc = do_write(&res, &fd, ¶m, ndsets, nbytes, buf_size, buffer); + io_time_set(res.timers, HDF5_FINE_WRITE_FIXED_DIMS, TSTOP); + + VRFY((hrc == SUCCESS), "do_write failed"); + + /* Close file for write */ + hrc = do_fclose(iot, &fd); + + io_time_set(res.timers, HDF5_GROSS_WRITE_FIXED_DIMS, TSTOP); + VRFY((hrc == SUCCESS), "do_fclose failed"); + + if (!param.h5_write_only) { + /* + * Read performance measurement + */ + /* Need barrier to make sure everyone is done writing and has + * closed the file. Also to make sure everyone starts reading + * at the same time. + */ + MPI_Barrier(pio_comm_g); + + /* Open file for read */ + io_time_set(res.timers, HDF5_GROSS_READ_FIXED_DIMS, TSTART); + hrc = do_fopen(¶m, fname, &fd, PIO_READ); + + VRFY((hrc == SUCCESS), "do_fopen failed"); + + io_time_set(res.timers, HDF5_FINE_READ_FIXED_DIMS, TSTART); + hrc = do_read(&res, &fd, ¶m, ndsets, nbytes, buf_size, buffer); + io_time_set(res.timers, HDF5_FINE_READ_FIXED_DIMS, TSTOP); + VRFY((hrc == SUCCESS), "do_read failed"); + + /* Close file for read */ + hrc = do_fclose(iot, &fd); + + io_time_set(res.timers, HDF5_GROSS_READ_FIXED_DIMS, TSTOP); + VRFY((hrc == SUCCESS), "do_fclose failed"); + } + + /* Need barrier to make sure everyone is done with the file */ + /* before it may be removed by do_cleanupfile */ + MPI_Barrier(pio_comm_g); + do_cleanupfile(iot, fname); + } + +done: + /* clean up */ + /* release HDF5 objects */ + + /* close any opened files */ + /* no remove(fname) because that should have happened normally. */ + switch (iot) { + case POSIXIO: + if (fd.posixfd != -1) + hrc = do_fclose(iot, &fd); + break; + case MPIO: + if (fd.mpifd != MPI_FILE_NULL) + hrc = do_fclose(iot, &fd); + break; + case PHDF5: + if (fd.h5fd != -1) + hrc = do_fclose(iot, &fd); + break; + default: + break; + } + + /* release generic resources */ + if (buffer) + HDfree(buffer); + res.ret_code = ret_code; + return res; +} + +/* + * Function: pio_create_filename + * Purpose: Create a new filename to write to. Determine the correct + * suffix to append to the filename by the type of I/O we're + * doing. Also, place in the /tmp/{$USER,$LOGIN} directory if + * USER or LOGIN are specified in the environment. + * Return: Pointer to filename or NULL + * Programmer: Bill Wendling, 21. November 2001 + * Modifications: + */ +static char * +pio_create_filename(iotype iot, const char *base_name, char *fullname, size_t size) +{ + const char *prefix, *suffix = ""; + char * ptr, last = '\0'; + size_t i, j; + + if (!base_name || !fullname || size < 1) + return NULL; + + HDmemset(fullname, 0, size); + + switch (iot) { + case POSIXIO: + suffix = ".posix"; + break; + case MPIO: + suffix = ".mpio"; + break; + case PHDF5: + suffix = ".h5"; + break; + default: + break; + } + + /* First use the environment variable and then try the constant */ + prefix = HDgetenv("HDF5_PARAPREFIX"); + +#ifdef HDF5_PARAPREFIX + if (!prefix) + prefix = HDF5_PARAPREFIX; +#endif /* HDF5_PARAPREFIX */ + + /* Prepend the prefix value to the base name */ + if (prefix && *prefix) { + /* If the prefix specifies the HDF5_PARAPREFIX directory, then + * default to using the "/tmp/$USER" or "/tmp/$LOGIN" + * directory instead. */ + register char *user, *login, *subdir; + + user = HDgetenv("USER"); + login = HDgetenv("LOGIN"); + subdir = (user ? user : login); + + if (subdir) { + for (i = 0; i < size - 1 && prefix[i]; i++) + fullname[i] = prefix[i]; + + fullname[i++] = '/'; + + for (j = 0; i < size && subdir[j]; i++, j++) + fullname[i] = subdir[j]; + } + else { + /* We didn't append the prefix yet */ + HDstrncpy(fullname, prefix, size); + fullname[size - 1] = '\0'; + } + + if ((HDstrlen(fullname) + HDstrlen(base_name) + 1) < size) { + /* Append the base_name with a slash first. Multiple slashes are + * handled below. */ + h5_stat_t buf; + + if (HDstat(fullname, &buf) < 0) + /* The directory doesn't exist just yet */ + if (HDmkdir(fullname, (mode_t)0755) < 0 && errno != EEXIST) { + /* We couldn't make the "/tmp/${USER,LOGIN}" subdirectory. + * Default to PREFIX's original prefix value. */ + HDstrcpy(fullname, prefix); + } + + HDstrcat(fullname, "/"); + HDstrcat(fullname, base_name); + } + else { + /* Buffer is too small */ + return NULL; + } + } + else if (HDstrlen(base_name) >= size) { + /* Buffer is too small */ + return NULL; + } + else { + HDstrcpy(fullname, base_name); + } + + /* Append a suffix */ + if (suffix) { + if (HDstrlen(fullname) + HDstrlen(suffix) >= size) + return NULL; + + HDstrcat(fullname, suffix); + } + + /* Remove any double slashes in the filename */ + for (ptr = fullname, i = j = 0; ptr && i < size; i++, ptr++) { + if (*ptr != '/' || last != '/') + fullname[j++] = *ptr; + + last = *ptr; + } + + return fullname; +} + +/* + * Function: do_write + * Purpose: Write the required amount of data to the file. + * Return: SUCCESS or FAIL + * Programmer: Albert Cheng, Bill Wendling, 2001/12/13 + * Modifications: + * Added 2D testing (Christian Chilan, 10. August 2005) + */ +static herr_t +do_write(results *res, file_descr *fd, parameters *parms, long ndsets, off_t nbytes, size_t buf_size, + void *buffer) +{ + int ret_code = SUCCESS; + int rc; /*routine return code */ + long ndset; + size_t blk_size; /* The block size to subdivide the xfer buffer into */ + off_t nbytes_xfer; /* Total number of bytes transferred so far */ + size_t nbytes_xfer_advance; /* Number of bytes transferred in a single I/O operation */ + size_t nbytes_toxfer; /* Number of bytes to transfer a particular time */ + char dname[64]; + off_t dset_offset = 0; /*dataset offset in a file */ + off_t bytes_begin[2]; /*first elmt this process transfer */ + off_t bytes_count; /*number of elmts this process transfer */ + off_t snbytes = 0; /*size of a side of the dataset square */ + unsigned char *buf_p; /* Current buffer pointer */ + + /* POSIX variables */ + off_t file_offset; /* File offset of the next transfer */ + off_t file_offset_advance; /* File offset advance after each I/O operation */ + off_t posix_file_offset; /* Base file offset of the next transfer */ + + /* MPI variables */ + MPI_Offset mpi_file_offset; /* Base file offset of the next transfer*/ + MPI_Offset mpi_offset; /* Offset in MPI file */ + MPI_Offset mpi_offset_advance; /* Offset advance after each I/O operation */ + MPI_Datatype mpi_file_type; /* MPI derived type for 1D file */ + MPI_Datatype mpi_blk_type; /* MPI derived type for 1D buffer */ + MPI_Datatype mpi_cont_type; /* MPI derived type for 2D contiguous file */ + MPI_Datatype mpi_partial_buffer_cont; /* MPI derived type for partial 2D contiguous buffer */ + MPI_Datatype mpi_inter_type; /* MPI derived type for 2D interleaved file */ + MPI_Datatype mpi_partial_buffer_inter; /* MPI derived type for partial 2D interleaved buffer */ + MPI_Datatype mpi_full_buffer; /* MPI derived type for 2D full buffer */ + MPI_Datatype mpi_full_chunk; /* MPI derived type for 2D full chunk */ + MPI_Datatype mpi_chunk_inter_type; /* MPI derived type for 2D chunk interleaved file */ + MPI_Datatype mpi_collective_type; /* Generic MPI derived type for 2D collective access */ + MPI_Status mpi_status; + int mrc; /* MPI return code */ + + /* HDF5 variables */ + herr_t hrc; /*HDF5 return code */ + hsize_t h5dims[2]; /*dataset dim sizes */ + hid_t h5dset_space_id = H5I_INVALID_HID; /*dataset space ID */ + hid_t h5mem_space_id = H5I_INVALID_HID; /*memory dataspace ID */ + hid_t h5ds_id = H5I_INVALID_HID; /*dataset handle */ + hsize_t h5block[2]; /*dataspace selection */ + hsize_t h5stride[2]; + hsize_t h5count[2]; + hsize_t h5start[2]; + hssize_t h5offset[2]; /* Selection offset within dataspace */ + hid_t h5dcpl = H5I_INVALID_HID; /* Dataset creation property list */ + hid_t h5dxpl = H5I_INVALID_HID; /* Dataset transfer property list */ + + /* Get the parameters from the parameter block */ + blk_size = parms->blk_size; + + /* There are two kinds of transfer patterns, contiguous and interleaved. + * Let 0,1,2,...,n be data accessed by process 0,1,2,...,n + * where n is rank of the last process. + * In contiguous pattern, data are accessed as + * 000...111...222...nnn... + * In interleaved pattern, data are accessed as + * 012...n012...n... + * These are all in the scope of one dataset. + */ + + /* 1D dataspace */ + if (!parms->dim2d) { + /* Contiguous Pattern: */ + if (!parms->interleaved) { + bytes_begin[0] = (off_t)(((double)nbytes * pio_mpi_rank_g) / pio_mpi_nprocs_g); + } /* end if */ + /* Interleaved Pattern: */ + else { + bytes_begin[0] = (off_t)(blk_size * (size_t)pio_mpi_rank_g); + } /* end else */ + + /* Prepare buffer for verifying data */ + if (parms->verify) + memset(buffer, pio_mpi_rank_g + 1, buf_size); + } /* end if */ + /* 2D dataspace */ + else { + /* nbytes is always the number of bytes per dataset (1D or 2D). If the + dataspace is 2D, snbytes is the size of a side of the dataset square. + */ + snbytes = sqrto(nbytes); + + /* Contiguous Pattern: */ + if (!parms->interleaved) { + bytes_begin[0] = (off_t)((double)snbytes * pio_mpi_rank_g / pio_mpi_nprocs_g); + bytes_begin[1] = 0; + } /* end if */ + /* Interleaved Pattern: */ + else { + bytes_begin[0] = 0; + + if (!parms->h5_use_chunks || parms->io_type == PHDF5) + bytes_begin[1] = (off_t)(blk_size * (size_t)pio_mpi_rank_g); + else + bytes_begin[1] = (off_t)(blk_size * blk_size * (size_t)pio_mpi_rank_g); + } /* end else */ + + /* Prepare buffer for verifying data */ + if (parms->verify) + HDmemset(buffer, pio_mpi_rank_g + 1, buf_size * blk_size); + } /* end else */ + + /* Calculate the total number of bytes (bytes_count) to be + * transferred by this process. It may be different for different + * transfer pattern due to rounding to integral values. + */ + /* + * Calculate the beginning bytes of this process and the next. + * bytes_count is the difference between these two beginnings. + * This way, it eliminates any rounding errors. + * (This is tricky, don't mess with the formula, rounding errors + * can easily get introduced) */ + bytes_count = (off_t)(((double)nbytes * (pio_mpi_rank_g + 1)) / pio_mpi_nprocs_g) - + (off_t)(((double)nbytes * pio_mpi_rank_g) / pio_mpi_nprocs_g); + + /* debug */ + if (pio_debug_level >= 4) { + HDprint_rank(output); + if (!parms->dim2d) { + HDfprintf(output, + "Debug(do_write): " + "buf_size=%zu, bytes_begin=%" H5_PRINTF_LL_WIDTH "d, bytes_count=%" H5_PRINTF_LL_WIDTH + "d\n", + buf_size, (long long)bytes_begin[0], (long long)bytes_count); + } + else { + HDfprintf(output, + "Debug(do_write): " + "linear buf_size=%zu, bytes_begin=(%" H5_PRINTF_LL_WIDTH "d,%" H5_PRINTF_LL_WIDTH + "d), bytes_count=%" H5_PRINTF_LL_WIDTH "d\n", + buf_size * blk_size, (long long)bytes_begin[0], (long long)bytes_begin[1], + (long long)bytes_count); + } + } + + /* I/O Access specific setup */ + switch (parms->io_type) { + case POSIXIO: + /* No extra setup */ + break; + + case MPIO: /* MPI-I/O setup */ + /* 1D dataspace */ + if (!parms->dim2d) { + /* Build block's derived type */ + mrc = MPI_Type_contiguous((int)blk_size, MPI_BYTE, &mpi_blk_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Build file's derived type */ + mrc = MPI_Type_vector((int)(buf_size / blk_size), (int)1, (int)pio_mpi_nprocs_g, mpi_blk_type, + &mpi_file_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit file type */ + mrc = MPI_Type_commit(&mpi_file_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Commit buffer type */ + mrc = MPI_Type_commit(&mpi_blk_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + } /* end if */ + /* 2D dataspace */ + else { + /* Build partial buffer derived type for contiguous access */ + + mrc = MPI_Type_contiguous((int)buf_size, MPI_BYTE, &mpi_partial_buffer_cont); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit partial buffer derived type */ + mrc = MPI_Type_commit(&mpi_partial_buffer_cont); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build contiguous file's derived type */ + mrc = MPI_Type_vector((int)blk_size, (int)1, (int)((size_t)snbytes / buf_size), + mpi_partial_buffer_cont, &mpi_cont_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit contiguous file type */ + mrc = MPI_Type_commit(&mpi_cont_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build partial buffer derived type for interleaved access */ + mrc = MPI_Type_contiguous((int)blk_size, MPI_BYTE, &mpi_partial_buffer_inter); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit partial buffer derived type */ + mrc = MPI_Type_commit(&mpi_partial_buffer_inter); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build interleaved file's derived type */ + mrc = MPI_Type_vector((int)buf_size, (int)1, (int)((size_t)snbytes / blk_size), + mpi_partial_buffer_inter, &mpi_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit interleaved file type */ + mrc = MPI_Type_commit(&mpi_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build full buffer derived type */ + mrc = MPI_Type_contiguous((int)(blk_size * buf_size), MPI_BYTE, &mpi_full_buffer); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit full buffer derived type */ + mrc = MPI_Type_commit(&mpi_full_buffer); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build full chunk derived type */ + mrc = MPI_Type_contiguous((int)(blk_size * blk_size), MPI_BYTE, &mpi_full_chunk); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit full chunk derived type */ + mrc = MPI_Type_commit(&mpi_full_chunk); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build chunk interleaved file's derived type */ + mrc = MPI_Type_vector((int)(buf_size / blk_size), (int)1, (int)((size_t)snbytes / blk_size), + mpi_full_chunk, &mpi_chunk_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit chunk interleaved file type */ + mrc = MPI_Type_commit(&mpi_chunk_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + } /* end else */ + break; + + case PHDF5: /* HDF5 setup */ + /* 1D dataspace */ + if (!parms->dim2d) { + if (nbytes > 0) { + /* define a contiguous dataset of nbytes native bytes */ + h5dims[0] = (hsize_t)nbytes; + h5dset_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + + /* Set up the file dset space id to select the pattern to access */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5stride[0] = h5block[0] = blk_size; + h5count[0] = buf_size / blk_size; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5stride[0] = blk_size * (size_t)pio_mpi_nprocs_g; + h5block[0] = blk_size; + h5count[0] = buf_size / blk_size; + } /* end else */ + hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, h5start, h5stride, h5count, + h5block); + VRFY((hrc >= 0), "H5Sselect_hyperslab"); + } /* end if */ + else { + h5dset_space_id = H5Screate(H5S_SCALAR); + VRFY((h5dset_space_id >= 0), "H5Screate"); + } /* end else */ + + /* Create the memory dataspace that corresponds to the xfer buffer */ + if (buf_size > 0) { + h5dims[0] = buf_size; + h5mem_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + } /* end if */ + else { + h5mem_space_id = H5Screate(H5S_SCALAR); + VRFY((h5mem_space_id >= 0), "H5Screate"); + } /* end else */ + } /* end if */ + /* 2D dataspace */ + else { + if (nbytes > 0) { + /* define a contiguous dataset of nbytes native bytes */ + h5dims[0] = (hsize_t)snbytes; + h5dims[1] = (hsize_t)snbytes; + h5dset_space_id = H5Screate_simple(2, h5dims, NULL); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + + /* Set up the file dset space id to select the pattern to access */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5start[1] = (hsize_t)bytes_begin[1]; + h5stride[0] = 1; + h5stride[1] = h5block[0] = h5block[1] = blk_size; + h5count[0] = 1; + h5count[1] = buf_size / blk_size; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5start[1] = (hsize_t)bytes_begin[1]; + h5stride[0] = blk_size; + h5stride[1] = blk_size * (size_t)pio_mpi_nprocs_g; + h5block[0] = h5block[1] = blk_size; + h5count[0] = buf_size / blk_size; + h5count[1] = 1; + } /* end else */ + hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, h5start, h5stride, h5count, + h5block); + VRFY((hrc >= 0), "H5Sselect_hyperslab"); + } /* end if */ + else { + h5dset_space_id = H5Screate(H5S_SCALAR); + VRFY((h5dset_space_id >= 0), "H5Screate"); + } /* end else */ + + /* Create the memory dataspace that corresponds to the xfer buffer */ + if (buf_size > 0) { + if (!parms->interleaved) { + h5dims[0] = blk_size; + h5dims[1] = buf_size; + } + else { + h5dims[0] = buf_size; + h5dims[1] = blk_size; + } + h5mem_space_id = H5Screate_simple(2, h5dims, NULL); + VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + } /* end if */ + else { + h5mem_space_id = H5Screate(H5S_SCALAR); + VRFY((h5mem_space_id >= 0), "H5Screate"); + } /* end else */ + } /* end else */ + + /* Create the dataset transfer property list */ + h5dxpl = H5Pcreate(H5P_DATASET_XFER); + if (h5dxpl < 0) { + HDfprintf(stderr, "HDF5 Property List Create failed\n"); + GOTOERROR(FAIL); + } + + /* Change to collective I/O, if asked */ + if (parms->collective) { + hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } /* end if */ + } /* end if */ + break; + + default: + break; + } /* end switch */ + + for (ndset = 1; ndset <= ndsets; ++ndset) { + + /* Calculate dataset offset within a file */ + + /* create dataset */ + switch (parms->io_type) { + case POSIXIO: + case MPIO: + /* both posix and mpi io just need dataset offset in file*/ + dset_offset = (ndset - 1) * nbytes; + break; + + case PHDF5: + h5dcpl = H5Pcreate(H5P_DATASET_CREATE); + if (h5dcpl < 0) { + HDfprintf(stderr, "HDF5 Property List Create failed\n"); + GOTOERROR(FAIL); + } + /* 1D dataspace */ + if (!parms->dim2d) { + /* Make the dataset chunked if asked */ + if (parms->h5_use_chunks) { + /* Set the chunk size to be the same as the buffer size */ + h5dims[0] = blk_size; + hrc = H5Pset_chunk(h5dcpl, 1, h5dims); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } /* end if */ + } /* end if */ + } /* end if */ + else { + /* 2D dataspace */ + if (parms->h5_use_chunks) { + /* Set the chunk size to be the same as the block size */ + h5dims[0] = blk_size; + h5dims[1] = blk_size; + hrc = H5Pset_chunk(h5dcpl, 2, h5dims); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } /* end if */ + } /* end if */ + } /* end else */ + + HDsprintf(dname, "Dataset_%ld", ndset); + h5ds_id = H5DCREATE(fd->h5fd, dname, ELMT_H5_TYPE, h5dset_space_id, h5dcpl); + + if (h5ds_id < 0) { + HDfprintf(stderr, "HDF5 Dataset Create failed\n"); + GOTOERROR(FAIL); + } + + hrc = H5Pclose(h5dcpl); + /* verifying the close of the dcpl */ + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Property List Close failed\n"); + GOTOERROR(FAIL); + } + break; + + default: + break; + } + + /* The task is to transfer bytes_count bytes, starting at + * bytes_begin position, using transfer buffer of buf_size bytes. + * If interleaved, select buf_size at a time, in round robin + * fashion, according to number of process. Otherwise, select + * all bytes_count in contiguous. + */ + nbytes_xfer = 0; + + /* 1D dataspace */ + if (!parms->dim2d) { + /* Set base file offset for all I/O patterns and POSIX access */ + posix_file_offset = dset_offset + bytes_begin[0]; + + /* Set base file offset for all I/O patterns and MPI access */ + mpi_file_offset = (MPI_Offset)(dset_offset + bytes_begin[0]); + } /* end if */ + else { + /* Set base file offset for all I/O patterns and POSIX access */ + posix_file_offset = dset_offset + bytes_begin[0] * snbytes + bytes_begin[1]; + + /* Set base file offset for all I/O patterns and MPI access */ + mpi_file_offset = (MPI_Offset)(dset_offset + bytes_begin[0] * snbytes + bytes_begin[1]); + } /* end else */ + + /* Start "raw data" write timer */ + io_time_set(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, TSTART); + + while (nbytes_xfer < bytes_count) { + /* Write */ + /* Calculate offset of write within a dataset/file */ + switch (parms->io_type) { + case POSIXIO: + /* 1D dataspace */ + if (!parms->dim2d) { + /* Contiguous pattern */ + if (!parms->interleaved) { + /* Compute file offset */ + file_offset = posix_file_offset + (off_t)nbytes_xfer; + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc == 0), "POSIXSEEK"); + + /* check if all bytes are written */ + rc = ((ssize_t)buf_size == POSIXWRITE(fd->posixfd, buffer, buf_size)); + VRFY((rc != 0), "POSIXWRITE"); + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size; + + /* Loop over the buffers to write */ + while (nbytes_toxfer > 0) { + /* Skip offset over blocks of other processes */ + file_offset = posix_file_offset + (off_t)(nbytes_xfer * pio_mpi_nprocs_g); + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc == 0), "POSIXSEEK"); + + /* check if all bytes are written */ + rc = ((ssize_t)blk_size == POSIXWRITE(fd->posixfd, buf_p, blk_size)); + VRFY((rc != 0), "POSIXWRITE"); + + /* Advance location in buffer */ + buf_p += blk_size; + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)blk_size; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= blk_size; + } /* end while */ + } /* end else */ + } /* end if */ + /* 2D dataspace */ + else { + /* Contiguous storage */ + if (!parms->h5_use_chunks) { + /* Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute file offset */ + file_offset = posix_file_offset + + (off_t)((((size_t)nbytes_xfer / blk_size) / (size_t)snbytes) * + (blk_size * (size_t)snbytes) + + (((size_t)nbytes_xfer / blk_size) % (size_t)snbytes)); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = buf_size; + + /* Global offset advance after each I/O operation */ + file_offset_advance = (off_t)snbytes; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Compute file offset */ + file_offset = + posix_file_offset + + (off_t)(((((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) / + (size_t)snbytes) * + (buf_size * (size_t)snbytes) + + (((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) % + (size_t)snbytes); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size; + + /* Global offset advance after each I/O operation */ + file_offset_advance = (off_t)snbytes; + } /* end else */ + } /* end if */ + /* Chunked storage */ + else { + /*Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute file offset */ + file_offset = posix_file_offset + (off_t)nbytes_xfer; + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * buf_size; + + /* Global offset advance after each I/O operation */ + file_offset_advance = 0; + } /* end if */ + /*Interleaved access pattern */ + else { + /* Compute file offset */ + /* Before simplification */ + /* file_offset=posix_file_offset+(off_t)((nbytes_xfer/(buf_size/blk_size) + *pio_mpi_nprocs_g)/(snbytes/blk_size*(blk_size*blk_size))*(buf_size/blk_size + *snbytes/blk_size*(blk_size*blk_size))+((nbytes_xfer/(buf_size/blk_size)) + *pio_mpi_nprocs_g)%(snbytes/blk_size*(blk_size*blk_size))); */ + + file_offset = posix_file_offset + + (off_t)((((size_t)nbytes_xfer / (buf_size / blk_size) * + (size_t)pio_mpi_nprocs_g) / + ((size_t)snbytes * blk_size)) * + (buf_size * (size_t)snbytes) + + (((size_t)nbytes_xfer / (buf_size / blk_size)) * + (size_t)pio_mpi_nprocs_g) % + ((size_t)snbytes * blk_size)); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * blk_size; + + /* Global offset advance after each I/O operation */ + /* file_offset_advance = (off_t)(snbytes/blk_size*(blk_size*blk_size)); */ + file_offset_advance = (off_t)snbytes * (off_t)blk_size; + } /* end else */ + } /* end else */ + + /* Common code for file access */ + + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size * blk_size; + + /* Loop over portions of the buffer to write */ + while (nbytes_toxfer > 0) { + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc == 0), "POSIXSEEK"); + + /* check if all bytes are written */ + rc = ((ssize_t)nbytes_xfer_advance == + POSIXWRITE(fd->posixfd, buf_p, nbytes_xfer_advance)); + VRFY((rc != 0), "POSIXWRITE"); + + /* Advance location in buffer */ + buf_p += nbytes_xfer_advance; + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)nbytes_xfer_advance; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= nbytes_xfer_advance; + + /* Partially advance file offset */ + file_offset += file_offset_advance; + } /* end while */ + + } /* end else */ + + break; + + case MPIO: + /* 1D dataspace */ + if (!parms->dim2d) { + /* Independent file access */ + if (!parms->collective) { + /* Contiguous pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + nbytes_xfer; + + /* Perform independent write */ + mrc = + MPI_File_write_at(fd->mpifd, mpi_offset, buffer, + (int)(buf_size / blk_size), mpi_blk_type, &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size; + + /* Loop over the buffers to write */ + while (nbytes_toxfer > 0) { + /* Skip offset over blocks of other processes */ + mpi_offset = mpi_file_offset + (nbytes_xfer * pio_mpi_nprocs_g); + + /* Perform independent write */ + mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buf_p, (int)1, + mpi_blk_type, &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance location in buffer */ + buf_p += blk_size; + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)blk_size; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= blk_size; + } /* end while */ + } /* end else */ + } /* end if */ + /* Collective file access */ + else { + /* Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + nbytes_xfer; + + /* Perform independent write */ + mrc = MPI_File_write_at_all(fd->mpifd, mpi_offset, buffer, + (int)(buf_size / blk_size), mpi_blk_type, + &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + (nbytes_xfer * pio_mpi_nprocs_g); + + /* Set the file view */ + mrc = MPI_File_set_view(fd->mpifd, mpi_offset, mpi_blk_type, mpi_file_type, + (char *)"native", h5_io_info_g); + VRFY((mrc == MPI_SUCCESS), "MPIO_VIEW"); + + /* Perform write */ + mrc = MPI_File_write_at_all(fd->mpifd, 0, buffer, (int)(buf_size / blk_size), + mpi_blk_type, &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)buf_size; + } /* end else */ + } /* end else */ + } /* end if */ + /* 2D dataspace */ + else { + /* Contiguous storage */ + if (!parms->h5_use_chunks) { + /* Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = + mpi_file_offset + + (MPI_Offset)((((size_t)nbytes_xfer / blk_size) / (size_t)snbytes) * + (blk_size * (size_t)snbytes)) + + (MPI_Offset)(((size_t)nbytes_xfer / blk_size) % (size_t)snbytes); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = buf_size; + + /* Global offset advance after each I/O operation */ + mpi_offset_advance = snbytes; + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_cont_type; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Compute offset in file */ + mpi_offset = + mpi_file_offset + + (MPI_Offset)( + ((((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) / + (size_t)snbytes) * + (buf_size * (size_t)snbytes)) + + (MPI_Offset)( + (((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) % + (size_t)snbytes); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size; + + /* Global offset advance after each I/O operation */ + mpi_offset_advance = snbytes; + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_inter_type; + } /* end else */ + } /* end if */ + /* Chunked storage */ + else { + /*Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + nbytes_xfer; + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * buf_size; + + /* Global offset advance after each I/O operation */ + mpi_offset_advance = 0; + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_full_buffer; + } /* end if */ + /*Interleaved access pattern */ + else { + /* Compute offset in file */ + /* Before simplification */ + /* mpi_offset=mpi_file_offset+(nbytes_xfer/(buf_size/blk_size) + *pio_mpi_nprocs_g)/(snbytes/blk_size*(blk_size*blk_size))* + (buf_size/blk_size*snbytes/blk_size*(blk_size*blk_size))+ + ((nbytes_xfer/(buf_size/blk_size))*pio_mpi_nprocs_g)%(snbytes + /blk_size*(blk_size*blk_size)); */ + mpi_offset = mpi_file_offset + + (MPI_Offset)((((size_t)nbytes_xfer / (buf_size / blk_size) * + (size_t)pio_mpi_nprocs_g) / + ((size_t)snbytes * blk_size)) * + (buf_size * (size_t)snbytes)) + + (MPI_Offset)((((size_t)nbytes_xfer / (buf_size / blk_size)) * + (size_t)pio_mpi_nprocs_g) % + ((size_t)snbytes * blk_size)); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * blk_size; + + /* Global offset advance after each I/O operation */ + /* mpi_offset_advance = (MPI_Offset)(snbytes/blk_size*(blk_size*blk_size)); */ + mpi_offset_advance = (MPI_Offset)((size_t)snbytes * blk_size); + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_chunk_inter_type; + } /* end else */ + } /* end else */ + + /* Common code for independent file access */ + if (!parms->collective) { + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size * blk_size; + + /* Loop over portions of the buffer to write */ + while (nbytes_toxfer > 0) { + /* Perform independent write */ + mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buf_p, + (int)nbytes_xfer_advance, MPI_BYTE, &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance location in buffer */ + buf_p += nbytes_xfer_advance; + + /* Advance global offset in dataset */ + nbytes_xfer += (ssize_t)nbytes_xfer_advance; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= nbytes_xfer_advance; + + /* Partially advance global offset in dataset */ + mpi_offset += mpi_offset_advance; + } /* end while */ + } /* end if */ + + /* Common code for collective file access */ + else { + /* Set the file view */ + mrc = MPI_File_set_view(fd->mpifd, mpi_offset, MPI_BYTE, mpi_collective_type, + (char *)"native", h5_io_info_g); + VRFY((mrc == MPI_SUCCESS), "MPIO_VIEW"); + + /* Perform write */ + MPI_File_write_at_all(fd->mpifd, 0, buffer, (int)(buf_size * blk_size), MPI_BYTE, + &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_WRITE"); + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)buf_size * (off_t)blk_size; + } /* end else */ + + } /* end else */ + + break; + + case PHDF5: + /* 1D dataspace */ + if (!parms->dim2d) { + /* Set up the file dset space id to move the selection to process */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5offset[0] = nbytes_xfer; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5offset[0] = (nbytes_xfer * pio_mpi_nprocs_g); + } /* end else */ + hrc = H5Soffset_simple(h5dset_space_id, h5offset); + VRFY((hrc >= 0), "H5Soffset_simple"); + + /* Write the buffer out */ + hrc = + H5Dwrite(h5ds_id, ELMT_H5_TYPE, h5mem_space_id, h5dset_space_id, h5dxpl, buffer); + VRFY((hrc >= 0), "H5Dwrite"); + + /* Increment number of bytes transferred */ + nbytes_xfer += (ssize_t)buf_size; + } /* end if */ + /* 2D dataspace */ + else { + /* Set up the file dset space id to move the selection to process */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5offset[0] = + (hssize_t)(((size_t)nbytes_xfer / ((size_t)snbytes * blk_size)) * blk_size); + h5offset[1] = + (hssize_t)(((size_t)nbytes_xfer % ((size_t)snbytes * blk_size)) / blk_size); + + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5offset[0] = (hssize_t)((((size_t)nbytes_xfer * (size_t)pio_mpi_nprocs_g) / + ((size_t)snbytes * buf_size)) * + buf_size); + h5offset[1] = (hssize_t)((((size_t)nbytes_xfer * (size_t)pio_mpi_nprocs_g) % + ((size_t)snbytes * buf_size)) / + buf_size); + + } /* end else */ + hrc = H5Soffset_simple(h5dset_space_id, h5offset); + VRFY((hrc >= 0), "H5Soffset_simple"); + + /* Write the buffer out */ + hrc = + H5Dwrite(h5ds_id, ELMT_H5_TYPE, h5mem_space_id, h5dset_space_id, h5dxpl, buffer); + VRFY((hrc >= 0), "H5Dwrite"); + + /* Increment number of bytes transferred */ + nbytes_xfer += (off_t)buf_size * (off_t)blk_size; + + } /* end else */ + + break; + + default: + break; + } /* switch (parms->io_type) */ + } /* end while */ + + /* Stop "raw data" write timer */ + io_time_set(res->timers, HDF5_RAW_WRITE_FIXED_DIMS, TSTOP); + + /* Calculate write time */ + + /* Close dataset. Only HDF5 needs to do an explicit close. */ + if (parms->io_type == PHDF5) { + hrc = H5Dclose(h5ds_id); + + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Close failed\n"); + GOTOERROR(FAIL); + } + + h5ds_id = H5I_INVALID_HID; + } /* end if */ + } /* end for */ + +done: + /* release MPI-I/O objects */ + if (parms->io_type == MPIO) { + /* 1D dataspace */ + if (!parms->dim2d) { + /* Free file type */ + mrc = MPI_Type_free(&mpi_file_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free buffer type */ + mrc = MPI_Type_free(&mpi_blk_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + } /* end if */ + /* 2D dataspace */ + else { + /* Free partial buffer type for contiguous access */ + mrc = MPI_Type_free(&mpi_partial_buffer_cont); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free contiguous file type */ + mrc = MPI_Type_free(&mpi_cont_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free partial buffer type for interleaved access */ + mrc = MPI_Type_free(&mpi_partial_buffer_inter); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free interleaved file type */ + mrc = MPI_Type_free(&mpi_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free full buffer type */ + mrc = MPI_Type_free(&mpi_full_buffer); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free full chunk type */ + mrc = MPI_Type_free(&mpi_full_chunk); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free chunk interleaved file type */ + mrc = MPI_Type_free(&mpi_chunk_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + } /* end else */ + } /* end if */ + + /* release HDF5 objects */ + if (h5dset_space_id != -1) { + hrc = H5Sclose(h5dset_space_id); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Space Close failed\n"); + ret_code = FAIL; + } + else { + h5dset_space_id = H5I_INVALID_HID; + } + } + + if (h5mem_space_id != -1) { + hrc = H5Sclose(h5mem_space_id); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Memory Space Close failed\n"); + ret_code = FAIL; + } + else { + h5mem_space_id = H5I_INVALID_HID; + } + } + + if (h5dxpl != -1) { + hrc = H5Pclose(h5dxpl); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Transfer Property List Close failed\n"); + ret_code = FAIL; + } + else { + h5dxpl = H5I_INVALID_HID; + } + } + + return ret_code; +} + +static off_t +sqrto(off_t x) +{ + double root_x = sqrt((double)x); + return (off_t)root_x; +} + +/* + * Function: do_read + * Purpose: read the required amount of data from the file. + * Return: SUCCESS or FAIL + * Programmer: Albert Cheng 2001/12/13 + * Modifications: + * Added 2D testing (Christian Chilan, 10. August 2005) + */ +static herr_t +do_read(results *res, file_descr *fd, parameters *parms, long ndsets, off_t nbytes, size_t buf_size, + void *buffer /*out*/) +{ + int ret_code = SUCCESS; + int rc; /*routine return code */ + long ndset; + size_t blk_size; /* The block size to subdivide the xfer buffer into */ + size_t bsize; /* Size of the actual buffer */ + off_t nbytes_xfer; /* Total number of bytes transferred so far */ + size_t nbytes_xfer_advance; /* Number of bytes transferred in a single I/O operation */ + size_t nbytes_toxfer; /* Number of bytes to transfer a particular time */ + char dname[64]; + off_t dset_offset = 0; /*dataset offset in a file */ + off_t bytes_begin[2]; /*first elmt this process transfer */ + off_t bytes_count; /*number of elmts this process transfer */ + off_t snbytes = 0; /*size of a side of the dataset square */ + unsigned char *buf_p; /* Current buffer pointer */ + + /* POSIX variables */ + off_t file_offset; /* File offset of the next transfer */ + off_t file_offset_advance; /* File offset advance after each I/O operation */ + off_t posix_file_offset; /* Base file offset of the next transfer */ + + /* MPI variables */ + MPI_Offset mpi_file_offset; /* Base file offset of the next transfer*/ + MPI_Offset mpi_offset; /* Offset in MPI file */ + MPI_Offset mpi_offset_advance; /* Offset advance after each I/O operation */ + MPI_Datatype mpi_file_type; /* MPI derived type for 1D file */ + MPI_Datatype mpi_blk_type; /* MPI derived type for 1D buffer */ + MPI_Datatype mpi_cont_type; /* MPI derived type for 2D contiguous file */ + MPI_Datatype mpi_partial_buffer_cont; /* MPI derived type for partial 2D contiguous buffer */ + MPI_Datatype mpi_inter_type; /* MPI derived type for 2D interleaved file */ + MPI_Datatype mpi_partial_buffer_inter; /* MPI derived type for partial 2D interleaved buffer */ + MPI_Datatype mpi_full_buffer; /* MPI derived type for 2D full buffer */ + MPI_Datatype mpi_full_chunk; /* MPI derived type for 2D full chunk */ + MPI_Datatype mpi_chunk_inter_type; /* MPI derived type for 2D chunk interleaved file */ + MPI_Datatype mpi_collective_type; /* Generic MPI derived type for 2D collective access */ + MPI_Status mpi_status; + int mrc; /* MPI return code */ + + /* HDF5 variables */ + herr_t hrc; /*HDF5 return code */ + hsize_t h5dims[2]; /*dataset dim sizes */ + hid_t h5dset_space_id = H5I_INVALID_HID; /*dataset space ID */ + hid_t h5mem_space_id = H5I_INVALID_HID; /*memory dataspace ID */ + hid_t h5ds_id = H5I_INVALID_HID; /*dataset handle */ + hsize_t h5block[2]; /*dataspace selection */ + hsize_t h5stride[2]; + hsize_t h5count[2]; + hsize_t h5start[2]; + hssize_t h5offset[2]; /* Selection offset within dataspace */ + hid_t h5dxpl = H5I_INVALID_HID; /* Dataset transfer property list */ + + /* Get the parameters from the parameter block */ + blk_size = parms->blk_size; + + /* There are two kinds of transfer patterns, contiguous and interleaved. + * Let 0,1,2,...,n be data accessed by process 0,1,2,...,n + * where n is rank of the last process. + * In contiguous pattern, data are accessed as + * 000...111...222...nnn... + * In interleaved pattern, data are accessed as + * 012...n012...n... + * These are all in the scope of one dataset. + */ + + /* 1D dataspace */ + if (!parms->dim2d) { + bsize = buf_size; + /* Contiguous Pattern: */ + if (!parms->interleaved) { + bytes_begin[0] = (off_t)(((double)nbytes * pio_mpi_rank_g) / pio_mpi_nprocs_g); + } /* end if */ + /* Interleaved Pattern: */ + else { + bytes_begin[0] = (off_t)blk_size * (off_t)pio_mpi_rank_g; + } /* end else */ + } /* end if */ + /* 2D dataspace */ + else { + /* nbytes is always the number of bytes per dataset (1D or 2D). If the + dataspace is 2D, snbytes is the size of a side of the 'dataset square'. + */ + snbytes = sqrto(nbytes); + + bsize = buf_size * blk_size; + + /* Contiguous Pattern: */ + if (!parms->interleaved) { + bytes_begin[0] = (off_t)((double)snbytes * pio_mpi_rank_g / pio_mpi_nprocs_g); + bytes_begin[1] = 0; + } /* end if */ + /* Interleaved Pattern: */ + else { + bytes_begin[0] = 0; + + if (!parms->h5_use_chunks || parms->io_type == PHDF5) + bytes_begin[1] = (off_t)blk_size * (off_t)pio_mpi_rank_g; + else + bytes_begin[1] = (off_t)blk_size * (off_t)blk_size * (off_t)pio_mpi_rank_g; + } /* end else */ + } /* end else */ + + /* Calculate the total number of bytes (bytes_count) to be + * transferred by this process. It may be different for different + * transfer pattern due to rounding to integral values. + */ + /* + * Calculate the beginning bytes of this process and the next. + * bytes_count is the difference between these two beginnings. + * This way, it eliminates any rounding errors. + * (This is tricky, don't mess with the formula, rounding errors + * can easily get introduced) */ + bytes_count = (off_t)(((double)nbytes * (pio_mpi_rank_g + 1)) / pio_mpi_nprocs_g) - + (off_t)(((double)nbytes * pio_mpi_rank_g) / pio_mpi_nprocs_g); + + /* debug */ + if (pio_debug_level >= 4) { + HDprint_rank(output); + if (!parms->dim2d) { + HDfprintf(output, + "Debug(do_write): " + "buf_size=%zu, bytes_begin=%" H5_PRINTF_LL_WIDTH "d, bytes_count=%" H5_PRINTF_LL_WIDTH + "d\n", + buf_size, (long long)bytes_begin[0], (long long)bytes_count); + } + else { + HDfprintf(output, + "Debug(do_write): " + "linear buf_size=%zu, bytes_begin=(%" H5_PRINTF_LL_WIDTH "d,%" H5_PRINTF_LL_WIDTH + "d), bytes_count=%" H5_PRINTF_LL_WIDTH "d\n", + buf_size * blk_size, (long long)bytes_begin[0], (long long)bytes_begin[1], + (long long)bytes_count); + } + } + + /* I/O Access specific setup */ + switch (parms->io_type) { + case POSIXIO: + /* No extra setup */ + break; + + case MPIO: /* MPI-I/O setup */ + /* 1D dataspace */ + if (!parms->dim2d) { + /* Build block's derived type */ + mrc = MPI_Type_contiguous((int)blk_size, MPI_BYTE, &mpi_blk_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Build file's derived type */ + mrc = MPI_Type_vector((int)(buf_size / blk_size), (int)1, (int)pio_mpi_nprocs_g, mpi_blk_type, + &mpi_file_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit file type */ + mrc = MPI_Type_commit(&mpi_file_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Commit buffer type */ + mrc = MPI_Type_commit(&mpi_blk_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + } /* end if */ + /* 2D dataspace */ + else { + /* Build partial buffer derived type for contiguous access */ + mrc = MPI_Type_contiguous((int)buf_size, MPI_BYTE, &mpi_partial_buffer_cont); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit partial buffer derived type */ + mrc = MPI_Type_commit(&mpi_partial_buffer_cont); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build contiguous file's derived type */ + mrc = MPI_Type_vector((int)blk_size, (int)1, (int)((size_t)snbytes / buf_size), + mpi_partial_buffer_cont, &mpi_cont_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit contiguous file type */ + mrc = MPI_Type_commit(&mpi_cont_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build partial buffer derived type for interleaved access */ + mrc = MPI_Type_contiguous((int)blk_size, MPI_BYTE, &mpi_partial_buffer_inter); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit partial buffer derived type */ + mrc = MPI_Type_commit(&mpi_partial_buffer_inter); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build interleaved file's derived type */ + mrc = MPI_Type_vector((int)buf_size, (int)1, (int)((size_t)snbytes / blk_size), + mpi_partial_buffer_inter, &mpi_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit interleaved file type */ + mrc = MPI_Type_commit(&mpi_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build full buffer derived type */ + mrc = MPI_Type_contiguous((int)(blk_size * buf_size), MPI_BYTE, &mpi_full_buffer); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit full buffer derived type */ + mrc = MPI_Type_commit(&mpi_full_buffer); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build full chunk derived type */ + mrc = MPI_Type_contiguous((int)(blk_size * blk_size), MPI_BYTE, &mpi_full_chunk); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit full chunk derived type */ + mrc = MPI_Type_commit(&mpi_full_chunk); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + + /* Build chunk interleaved file's derived type */ + mrc = MPI_Type_vector((int)(buf_size / blk_size), (int)1, (int)((size_t)snbytes / blk_size), + mpi_full_chunk, &mpi_chunk_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_CREATE"); + + /* Commit chunk interleaved file type */ + mrc = MPI_Type_commit(&mpi_chunk_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_COMMIT"); + } /* end else */ + break; + + case PHDF5: /* HDF5 setup */ + /* 1D dataspace */ + if (!parms->dim2d) { + if (nbytes > 0) { + /* define a contiguous dataset of nbytes native bytes */ + h5dims[0] = (hsize_t)nbytes; + h5dset_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + + /* Set up the file dset space id to select the pattern to access */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5stride[0] = h5block[0] = blk_size; + h5count[0] = buf_size / blk_size; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5stride[0] = blk_size * (size_t)pio_mpi_nprocs_g; + h5block[0] = blk_size; + h5count[0] = buf_size / blk_size; + } /* end else */ + hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, h5start, h5stride, h5count, + h5block); + VRFY((hrc >= 0), "H5Sselect_hyperslab"); + } /* end if */ + else { + h5dset_space_id = H5Screate(H5S_SCALAR); + VRFY((h5dset_space_id >= 0), "H5Screate"); + } /* end else */ + + /* Create the memory dataspace that corresponds to the xfer buffer */ + if (buf_size > 0) { + h5dims[0] = buf_size; + h5mem_space_id = H5Screate_simple(1, h5dims, NULL); + VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + } /* end if */ + else { + h5mem_space_id = H5Screate(H5S_SCALAR); + VRFY((h5mem_space_id >= 0), "H5Screate"); + } /* end else */ + } /* end if */ + /* 2D dataspace */ + else { + if (nbytes > 0) { + /* define a contiguous dataset of nbytes native bytes */ + h5dims[0] = (hsize_t)snbytes; + h5dims[1] = (hsize_t)snbytes; + h5dset_space_id = H5Screate_simple(2, h5dims, NULL); + VRFY((h5dset_space_id >= 0), "H5Screate_simple"); + + /* Set up the file dset space id to select the pattern to access */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5start[1] = (hsize_t)bytes_begin[1]; + h5stride[0] = 1; + h5stride[1] = h5block[0] = h5block[1] = blk_size; + h5count[0] = 1; + h5count[1] = buf_size / blk_size; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5start[0] = (hsize_t)bytes_begin[0]; + h5start[1] = (hsize_t)bytes_begin[1]; + h5stride[0] = blk_size; + h5stride[1] = blk_size * (size_t)pio_mpi_nprocs_g; + h5block[0] = h5block[1] = blk_size; + h5count[0] = buf_size / blk_size; + h5count[1] = 1; + } /* end else */ + hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, h5start, h5stride, h5count, + h5block); + VRFY((hrc >= 0), "H5Sselect_hyperslab"); + } /* end if */ + else { + h5dset_space_id = H5Screate(H5S_SCALAR); + VRFY((h5dset_space_id >= 0), "H5Screate"); + } /* end else */ + + /* Create the memory dataspace that corresponds to the xfer buffer */ + if (buf_size > 0) { + if (!parms->interleaved) { + h5dims[0] = blk_size; + h5dims[1] = buf_size; + } + else { + h5dims[0] = buf_size; + h5dims[1] = blk_size; + } + h5mem_space_id = H5Screate_simple(2, h5dims, NULL); + VRFY((h5mem_space_id >= 0), "H5Screate_simple"); + } /* end if */ + else { + h5mem_space_id = H5Screate(H5S_SCALAR); + VRFY((h5mem_space_id >= 0), "H5Screate"); + } /* end else */ + } /* end else */ + + /* Create the dataset transfer property list */ + h5dxpl = H5Pcreate(H5P_DATASET_XFER); + if (h5dxpl < 0) { + HDfprintf(stderr, "HDF5 Property List Create failed\n"); + GOTOERROR(FAIL); + } + + /* Change to collective I/O, if asked */ + if (parms->collective) { + hrc = H5Pset_dxpl_mpio(h5dxpl, H5FD_MPIO_COLLECTIVE); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } /* end if */ + } /* end if */ + break; + + default: + break; + } /* end switch */ + + for (ndset = 1; ndset <= ndsets; ++ndset) { + + /* Calculate dataset offset within a file */ + + /* create dataset */ + switch (parms->io_type) { + case POSIXIO: + case MPIO: + /* both posix and mpi io just need dataset offset in file*/ + dset_offset = (ndset - 1) * nbytes; + break; + + case PHDF5: + HDsprintf(dname, "Dataset_%ld", ndset); + h5ds_id = H5DOPEN(fd->h5fd, dname); + if (h5ds_id < 0) { + HDfprintf(stderr, "HDF5 Dataset open failed\n"); + GOTOERROR(FAIL); + } + break; + + default: + break; + } + + /* The task is to transfer bytes_count bytes, starting at + * bytes_begin position, using transfer buffer of buf_size bytes. + * If interleaved, select buf_size at a time, in round robin + * fashion, according to number of process. Otherwise, select + * all bytes_count in contiguous. + */ + nbytes_xfer = 0; + + /* 1D dataspace */ + if (!parms->dim2d) { + /* Set base file offset for all I/O patterns and POSIX access */ + posix_file_offset = dset_offset + bytes_begin[0]; + + /* Set base file offset for all I/O patterns and MPI access */ + mpi_file_offset = (MPI_Offset)(dset_offset + bytes_begin[0]); + } /* end if */ + else { + /* Set base file offset for all I/O patterns and POSIX access */ + posix_file_offset = dset_offset + bytes_begin[0] * snbytes + bytes_begin[1]; + + /* Set base file offset for all I/O patterns and MPI access */ + mpi_file_offset = (MPI_Offset)(dset_offset + bytes_begin[0] * snbytes + bytes_begin[1]); + } /* end else */ + + /* Start "raw data" read timer */ + io_time_set(res->timers, HDF5_RAW_READ_FIXED_DIMS, TSTART); + + while (nbytes_xfer < bytes_count) { + /* Read */ + /* Calculate offset of read within a dataset/file */ + switch (parms->io_type) { + case POSIXIO: + /* 1D dataspace */ + if (!parms->dim2d) { + /* Contiguous pattern */ + if (!parms->interleaved) { + /* Compute file offset */ + file_offset = posix_file_offset + (off_t)nbytes_xfer; + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc == 0), "POSIXSEEK"); + + /* check if all bytes are read */ + rc = ((ssize_t)buf_size == POSIXREAD(fd->posixfd, buffer, buf_size)); + VRFY((rc != 0), "POSIXREAD"); + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size; + + /* Loop over the buffers to read */ + while (nbytes_toxfer > 0) { + /* Skip offset over blocks of other processes */ + file_offset = posix_file_offset + (off_t)(nbytes_xfer * pio_mpi_nprocs_g); + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc == 0), "POSIXSEEK"); + + /* check if all bytes are read */ + rc = ((ssize_t)blk_size == POSIXREAD(fd->posixfd, buf_p, blk_size)); + VRFY((rc != 0), "POSIXREAD"); + + /* Advance location in buffer */ + buf_p += blk_size; + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)blk_size; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= blk_size; + } /* end while */ + } /* end else */ + } /* end if */ + /* 2D dataspace */ + else { + /* Contiguous storage */ + if (!parms->h5_use_chunks) { + /* Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute file offset */ + file_offset = posix_file_offset + + (off_t)((((size_t)nbytes_xfer / blk_size) / (size_t)snbytes) * + (blk_size * (size_t)snbytes) + + (((size_t)nbytes_xfer / blk_size) % (size_t)snbytes)); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = buf_size; + + /* Global offset advance after each I/O operation */ + file_offset_advance = (off_t)snbytes; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Compute file offset */ + file_offset = + posix_file_offset + + (off_t)(((((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) / + (size_t)snbytes) * + (buf_size * (size_t)snbytes) + + (((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) % + (size_t)snbytes); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size; + + /* Global offset advance after each I/O operation */ + file_offset_advance = (off_t)snbytes; + } /* end else */ + } /* end if */ + /* Chunked storage */ + else { + /*Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute file offset */ + file_offset = posix_file_offset + (off_t)nbytes_xfer; + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * buf_size; + + /* Global offset advance after each I/O operation */ + file_offset_advance = 0; + } /* end if */ + /*Interleaved access pattern */ + else { + /* Compute file offset */ + /* Before simplification */ + /* file_offset=posix_file_offset+(off_t)((nbytes_xfer/(buf_size/blk_size) + *pio_mpi_nprocs_g)/(snbytes/blk_size*(blk_size*blk_size))*(buf_size/blk_size + *snbytes/blk_size*(blk_size*blk_size))+((nbytes_xfer/(buf_size/blk_size)) + *pio_mpi_nprocs_g)%(snbytes/blk_size*(blk_size*blk_size))); */ + + file_offset = posix_file_offset + + (off_t)((((size_t)nbytes_xfer / (buf_size / blk_size) * + (size_t)pio_mpi_nprocs_g) / + ((size_t)snbytes * blk_size)) * + (buf_size * (size_t)snbytes) + + (((size_t)nbytes_xfer / (buf_size / blk_size)) * + (size_t)pio_mpi_nprocs_g) % + ((size_t)snbytes * blk_size)); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * blk_size; + + /* Global offset advance after each I/O operation */ + /* file_offset_advance = (off_t)(snbytes/blk_size*(blk_size*blk_size)); */ + file_offset_advance = (off_t)((size_t)snbytes * blk_size); + } /* end else */ + } /* end else */ + + /* Common code for file access */ + + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size * blk_size; + + /* Loop over portions of the buffer to read */ + while (nbytes_toxfer > 0) { + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc == 0), "POSIXSEEK"); + + /* check if all bytes are read */ + rc = ((ssize_t)nbytes_xfer_advance == + POSIXREAD(fd->posixfd, buf_p, nbytes_xfer_advance)); + VRFY((rc != 0), "POSIXREAD"); + + /* Advance location in buffer */ + buf_p += nbytes_xfer_advance; + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)nbytes_xfer_advance; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= nbytes_xfer_advance; + + /* Partially advance file offset */ + file_offset += file_offset_advance; + } /* end while */ + + } /* end else */ + break; + + case MPIO: + /* 1D dataspace */ + if (!parms->dim2d) { + /* Independent file access */ + if (!parms->collective) { + /* Contiguous pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + nbytes_xfer; + + /* Perform independent read */ + mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buffer, + (int)(buf_size / blk_size), mpi_blk_type, &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_READ"); + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size; + + /* Loop over the buffers to read */ + while (nbytes_toxfer > 0) { + /* Skip offset over blocks of other processes */ + mpi_offset = mpi_file_offset + (nbytes_xfer * pio_mpi_nprocs_g); + + /* Perform independent read */ + mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buf_p, (int)1, mpi_blk_type, + &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_READ"); + + /* Advance location in buffer */ + buf_p += blk_size; + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)blk_size; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= blk_size; + } /* end while */ + } /* end else */ + } /* end if */ + /* Collective file access */ + else { + /* Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + nbytes_xfer; + + /* Perform collective read */ + mrc = MPI_File_read_at_all(fd->mpifd, mpi_offset, buffer, + (int)(buf_size / blk_size), mpi_blk_type, + &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_READ"); + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)buf_size; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + (nbytes_xfer * pio_mpi_nprocs_g); + + /* Set the file view */ + mrc = MPI_File_set_view(fd->mpifd, mpi_offset, mpi_blk_type, mpi_file_type, + (char *)"native", h5_io_info_g); + VRFY((mrc == MPI_SUCCESS), "MPIO_VIEW"); + + /* Perform collective read */ + mrc = MPI_File_read_at_all(fd->mpifd, 0, buffer, (int)(buf_size / blk_size), + mpi_blk_type, &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_READ"); + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)buf_size; + } /* end else */ + } /* end else */ + } /* end if */ + /* 2D dataspace */ + else { + /* Contiguous storage */ + if (!parms->h5_use_chunks) { + /* Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = + mpi_file_offset + + (MPI_Offset)((((size_t)nbytes_xfer / blk_size) / (size_t)snbytes) * + (blk_size * (size_t)snbytes)) + + (MPI_Offset)(((size_t)nbytes_xfer / blk_size) % (size_t)snbytes); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = buf_size; + + /* Global offset advance after each I/O operation */ + mpi_offset_advance = snbytes; + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_cont_type; + } /* end if */ + /* Interleaved access pattern */ + else { + /* Compute offset in file */ + mpi_offset = + mpi_file_offset + + (MPI_Offset)( + ((((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) / + (size_t)snbytes) * + (buf_size * (size_t)snbytes)) + + (MPI_Offset)( + (((size_t)nbytes_xfer / buf_size) * (size_t)pio_mpi_nprocs_g) % + (size_t)snbytes); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size; + + /* Global offset advance after each I/O operation */ + mpi_offset_advance = snbytes; + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_inter_type; + } /* end else */ + } /* end if */ + /* Chunked storage */ + else { + /*Contiguous access pattern */ + if (!parms->interleaved) { + /* Compute offset in file */ + mpi_offset = mpi_file_offset + nbytes_xfer; + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * buf_size; + + /* Global offset advance after each I/O operation */ + mpi_offset_advance = 0; + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_full_buffer; + } /* end if */ + /*Interleaved access pattern */ + else { + /* Compute offset in file */ + /* Before simplification */ + /* mpi_offset=mpi_file_offset+(nbytes_xfer/(buf_size/blk_size) + *pio_mpi_nprocs_g)/(snbytes/blk_size*(blk_size*blk_size))* + (buf_size/blk_size*snbytes/blk_size*(blk_size*blk_size))+ + ((nbytes_xfer/(buf_size/blk_size))*pio_mpi_nprocs_g)%(snbytes + /blk_size*(blk_size*blk_size)); */ + mpi_offset = mpi_file_offset + + (MPI_Offset)((((size_t)nbytes_xfer / (buf_size / blk_size) * + (size_t)pio_mpi_nprocs_g) / + ((size_t)snbytes * blk_size)) * + (buf_size * (size_t)snbytes)) + + (MPI_Offset)((((size_t)nbytes_xfer / (buf_size / blk_size)) * + (size_t)pio_mpi_nprocs_g) % + ((size_t)snbytes * blk_size)); + + /* Number of bytes to be transferred per I/O operation */ + nbytes_xfer_advance = blk_size * blk_size; + + /* Global offset advance after each I/O operation */ + /* mpi_offset_advance = (MPI_Offset)(snbytes/blk_size*(blk_size*blk_size)); */ + mpi_offset_advance = (MPI_Offset)((size_t)snbytes * blk_size); + + /* MPI type to be used for collective access */ + mpi_collective_type = mpi_chunk_inter_type; + } /* end else */ + } /* end else */ + + /* Common code for independent file access */ + if (!parms->collective) { + /* Set the base of user's buffer */ + buf_p = (unsigned char *)buffer; + + /* Set the number of bytes to transfer this time */ + nbytes_toxfer = buf_size * blk_size; + + /* Loop over portions of the buffer to read */ + while (nbytes_toxfer > 0) { + /* Perform independent read */ + mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buf_p, (int)nbytes_xfer_advance, + MPI_BYTE, &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_READ"); + + /* Advance location in buffer */ + buf_p += nbytes_xfer_advance; + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)nbytes_xfer_advance; + + /* Decrement number of bytes left this time */ + nbytes_toxfer -= nbytes_xfer_advance; + + /* Partially advance global offset in dataset */ + mpi_offset += mpi_offset_advance; + } /* end while */ + } /* end if */ + + /* Common code for collective file access */ + else { + /* Set the file view */ + mrc = MPI_File_set_view(fd->mpifd, mpi_offset, MPI_BYTE, mpi_collective_type, + (char *)"native", h5_io_info_g); + VRFY((mrc == MPI_SUCCESS), "MPIO_VIEW"); + + /* Perform read */ + MPI_File_read_at_all(fd->mpifd, 0, buffer, (int)(buf_size * blk_size), MPI_BYTE, + &mpi_status); + VRFY((mrc == MPI_SUCCESS), "MPIO_READ"); + + /* Advance global offset in dataset */ + nbytes_xfer += (off_t)buf_size * (off_t)blk_size; + } /* end else */ + + } /* end else */ + break; + + case PHDF5: + /* 1D dataspace */ + if (!parms->dim2d) { + /* Set up the file dset space id to move the selection to process */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5offset[0] = nbytes_xfer; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5offset[0] = (nbytes_xfer * pio_mpi_nprocs_g); + } /* end else */ + hrc = H5Soffset_simple(h5dset_space_id, h5offset); + VRFY((hrc >= 0), "H5Soffset_simple"); + + /* Read the buffer in */ + hrc = H5Dread(h5ds_id, ELMT_H5_TYPE, h5mem_space_id, h5dset_space_id, h5dxpl, buffer); + VRFY((hrc >= 0), "H5Dread"); + + /* Increment number of bytes transferred */ + nbytes_xfer += (off_t)buf_size; + } /* end if */ + /* 2D dataspace */ + else { + /* Set up the file dset space id to move the selection to process */ + if (!parms->interleaved) { + /* Contiguous pattern */ + h5offset[0] = + (hssize_t)(((size_t)nbytes_xfer / ((size_t)snbytes * blk_size)) * blk_size); + h5offset[1] = + (hssize_t)(((size_t)nbytes_xfer % ((size_t)snbytes * blk_size)) / blk_size); + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5offset[0] = (hssize_t)((((size_t)nbytes_xfer * (size_t)pio_mpi_nprocs_g) / + ((size_t)snbytes * buf_size)) * + buf_size); + h5offset[1] = (hssize_t)((((size_t)nbytes_xfer * (size_t)pio_mpi_nprocs_g) % + ((size_t)snbytes * buf_size)) / + buf_size); + + } /* end else */ + hrc = H5Soffset_simple(h5dset_space_id, h5offset); + VRFY((hrc >= 0), "H5Soffset_simple"); + + /* Write the buffer out */ + hrc = H5Dread(h5ds_id, ELMT_H5_TYPE, h5mem_space_id, h5dset_space_id, h5dxpl, buffer); + VRFY((hrc >= 0), "H5Dread"); + + /* Increment number of bytes transferred */ + nbytes_xfer += (off_t)buf_size * (off_t)blk_size; + + } /* end else */ + break; + + default: + break; + } /* switch (parms->io_type) */ + + /* Verify raw data, if asked */ + if (parms->verify) { + /* Verify data read */ + unsigned char *ucharptr = (unsigned char *)buffer; + size_t i; + int nerror = 0; + + for (i = 0; i < bsize; ++i) { + if (*ucharptr++ != pio_mpi_rank_g + 1) { + if (++nerror < 20) { + /* report at most 20 errors */ + HDprint_rank(output); + HDfprintf(output, + "read data error, expected (%d), " + "got (%d)\n", + pio_mpi_rank_g + 1, (int)*(ucharptr - 1)); + } /* end if */ + } /* end if */ + } /* end for */ + if (nerror >= 20) { + HDprint_rank(output); + HDfprintf(output, "..."); + HDfprintf(output, "total read data errors=%d\n", nerror); + } /* end if */ + } /* if (parms->verify) */ + + } /* end while */ + + /* Stop "raw data" read timer */ + io_time_set(res->timers, HDF5_RAW_READ_FIXED_DIMS, TSTOP); + + /* Calculate read time */ + + /* Close dataset. Only HDF5 needs to do an explicit close. */ + if (parms->io_type == PHDF5) { + hrc = H5Dclose(h5ds_id); + + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Close failed\n"); + GOTOERROR(FAIL); + } + + h5ds_id = H5I_INVALID_HID; + } /* end if */ + } /* end for */ + +done: + /* release MPI-I/O objects */ + if (parms->io_type == MPIO) { + /* 1D dataspace */ + if (!parms->dim2d) { + /* Free file type */ + mrc = MPI_Type_free(&mpi_file_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free buffer type */ + mrc = MPI_Type_free(&mpi_blk_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + } /* end if */ + /* 2D dataspace */ + else { + /* Free partial buffer type for contiguous access */ + mrc = MPI_Type_free(&mpi_partial_buffer_cont); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free contiguous file type */ + mrc = MPI_Type_free(&mpi_cont_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free partial buffer type for interleaved access */ + mrc = MPI_Type_free(&mpi_partial_buffer_inter); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free interleaved file type */ + mrc = MPI_Type_free(&mpi_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free full buffer type */ + mrc = MPI_Type_free(&mpi_full_buffer); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free full chunk type */ + mrc = MPI_Type_free(&mpi_full_chunk); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + + /* Free chunk interleaved file type */ + mrc = MPI_Type_free(&mpi_chunk_inter_type); + VRFY((mrc == MPI_SUCCESS), "MPIO_TYPE_FREE"); + } /* end else */ + } /* end if */ + + /* release HDF5 objects */ + if (h5dset_space_id != -1) { + hrc = H5Sclose(h5dset_space_id); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Space Close failed\n"); + ret_code = FAIL; + } + else { + h5dset_space_id = H5I_INVALID_HID; + } + } + + if (h5mem_space_id != -1) { + hrc = H5Sclose(h5mem_space_id); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Memory Space Close failed\n"); + ret_code = FAIL; + } + else { + h5mem_space_id = H5I_INVALID_HID; + } + } + + if (h5dxpl != -1) { + hrc = H5Pclose(h5dxpl); + if (hrc < 0) { + HDfprintf(stderr, "HDF5 Dataset Transfer Property List Close failed\n"); + ret_code = FAIL; + } + else { + h5dxpl = H5I_INVALID_HID; + } + } + + return ret_code; +} + +/* + * Function: do_fopen + * Purpose: Open the specified file. + * Return: SUCCESS or FAIL + * Programmer: Albert Cheng, Bill Wendling, 2001/12/13 + * Modifications: + */ +static herr_t +do_fopen(parameters *param, char *fname, file_descr *fd /*out*/, int flags) +{ + int ret_code = SUCCESS, mrc; + hid_t acc_tpl = H5I_INVALID_HID; /* file access templates */ + + switch (param->io_type) { + case POSIXIO: + if (flags & (PIO_CREATE | PIO_WRITE)) + fd->posixfd = POSIXCREATE(fname); + else + fd->posixfd = POSIXOPEN(fname, O_RDONLY); + + if (fd->posixfd < 0) { + HDfprintf(stderr, "POSIX File Open failed(%s)\n", fname); + GOTOERROR(FAIL); + } + + /* The perils of POSIX I/O in a parallel environment. The problem is: + * + * - Process n opens a file with truncation and then starts + * writing to the file. + * - Process m also opens the file with truncation, but after + * process n has already started to write to the file. Thus, + * all of the stuff process n wrote is now lost. + */ + MPI_Barrier(pio_comm_g); + + break; + + case MPIO: + if (flags & (PIO_CREATE | PIO_WRITE)) { + MPI_File_delete(fname, h5_io_info_g); + mrc = MPI_File_open(pio_comm_g, fname, MPI_MODE_CREATE | MPI_MODE_RDWR, h5_io_info_g, + &fd->mpifd); + + if (mrc != MPI_SUCCESS) { + HDfprintf(stderr, "MPI File Open failed(%s)\n", fname); + GOTOERROR(FAIL); + } + + /*since MPI_File_open with MPI_MODE_CREATE does not truncate */ + /*filesize , set size to 0 explicitedly. */ + mrc = MPI_File_set_size(fd->mpifd, (MPI_Offset)0); + if (mrc != MPI_SUCCESS) { + HDfprintf(stderr, "MPI_File_set_size failed\n"); + GOTOERROR(FAIL); + } + } + else { + mrc = MPI_File_open(pio_comm_g, fname, MPI_MODE_RDONLY, h5_io_info_g, &fd->mpifd); + if (mrc != MPI_SUCCESS) { + HDfprintf(stderr, "MPI File Open failed(%s)\n", fname); + GOTOERROR(FAIL); + } + } + + break; + + case PHDF5: + if ((acc_tpl = H5Pcreate(H5P_FILE_ACCESS)) < 0) { + HDfprintf(stderr, "HDF5 Property List Create failed\n"); + GOTOERROR(FAIL); + } + + /* Set the file driver to the MPI-IO driver */ + if (H5Pset_fapl_mpio(acc_tpl, pio_comm_g, h5_io_info_g) < 0) { + HDfprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } + + /* Set the alignment of objects in HDF5 file */ + if (H5Pset_alignment(acc_tpl, param->h5_thresh, param->h5_align) < 0) { + HDfprintf(stderr, "HDF5 Property List Set failed\n"); + GOTOERROR(FAIL); + } + + /* create the parallel file */ + if (flags & (PIO_CREATE | PIO_WRITE)) + fd->h5fd = H5Fcreate(fname, H5F_ACC_TRUNC, H5P_DEFAULT, acc_tpl); + else + fd->h5fd = H5Fopen(fname, H5F_ACC_RDONLY, acc_tpl); + if (fd->h5fd < 0) { + HDfprintf(stderr, "HDF5 File Create failed(%s)\n", fname); + GOTOERROR(FAIL); + } + + /* verifying the close of the acc_tpl */ + if (H5Pclose(acc_tpl) < 0) { + HDfprintf(stderr, "HDF5 Property List Close failed\n"); + GOTOERROR(FAIL); + } + + break; + + default: + break; + } + +done: + return ret_code; +} + +/* + * Function: do_fclose + * Purpose: Close the specified file descriptor. + * Return: SUCCESS or FAIL + * Programmer: Albert Cheng, Bill Wendling, 2001/12/13 + * Modifications: + */ +static herr_t +do_fclose(iotype iot, file_descr *fd /*out*/) +{ + herr_t ret_code = SUCCESS, hrc; + int mrc = 0, rc = 0; + + switch (iot) { + case POSIXIO: + rc = POSIXCLOSE(fd->posixfd); + + if (rc != 0) { + HDfprintf(stderr, "POSIX File Close failed\n"); + GOTOERROR(FAIL); + } + + fd->posixfd = -1; + break; + + case MPIO: + mrc = MPI_File_close(&fd->mpifd); + + if (mrc != MPI_SUCCESS) { + HDfprintf(stderr, "MPI File close failed\n"); + GOTOERROR(FAIL); + } + + fd->mpifd = MPI_FILE_NULL; + break; + + case PHDF5: + hrc = H5Fclose(fd->h5fd); + + if (hrc < 0) { + HDfprintf(stderr, "HDF5 File Close failed\n"); + GOTOERROR(FAIL); + } + + fd->h5fd = -1; + break; + + default: + break; + } + +done: + return ret_code; +} + +/* + * Function: do_fclose + * Purpose: Cleanup temporary file unless HDF5_NOCLEANUP is set. + * Only Proc 0 of the PIO communicator will do the cleanup. + * Other processes just return. + * Return: void + * Programmer: Albert Cheng 2001/12/12 + * Modifications: + */ +static void +do_cleanupfile(iotype iot, char *fname) +{ + if (pio_mpi_rank_g != 0) + return; + + if (clean_file_g == -1) + clean_file_g = (getenv("HDF5_NOCLEANUP") == NULL) ? 1 : 0; + + if (clean_file_g) { + switch (iot) { + case POSIXIO: + HDremove(fname); + break; + case MPIO: + case PHDF5: + MPI_File_delete(fname, h5_io_info_g); + break; + default: + break; + } + } +} + +#ifdef TIME_MPI +/* instrument the MPI_File_wrirte_xxx and read_xxx calls to measure + * pure time spent in MPI_File code. + */ +int +MPI_File_read_at(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, + MPI_Status *status) +{ + int err; + io_time_set(timer_g, HDF5_MPI_READ, TSTART); + err = PMPI_File_read_at(fh, offset, buf, count, datatype, status); + io_time_set(timer_g, HDF5_MPI_READ, TSTOP); + return err; +} + +int +MPI_File_read_at_all(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, + MPI_Status *status) +{ + int err; + io_time_set(timer_g, HDF5_MPI_READ, TSTART); + err = PMPI_File_read_at_all(fh, offset, buf, count, datatype, status); + io_time_set(timer_g, HDF5_MPI_READ, TSTOP); + return err; +} + +int +MPI_File_write_at(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, + MPI_Status *status) +{ + int err; + io_time_set(timer_g, HDF5_MPI_WRITE, TSTART); + err = PMPI_File_write_at(fh, offset, buf, count, datatype, status); + io_time_set(timer_g, HDF5_MPI_WRITE, TSTOP); + return err; +} + +int +MPI_File_write_at_all(MPI_File fh, MPI_Offset offset, void *buf, int count, MPI_Datatype datatype, + MPI_Status *status) +{ + int err; + io_time_set(timer_g, HDF5_MPI_WRITE, TSTART); + err = PMPI_File_write_at_all(fh, offset, buf, count, datatype, status); + io_time_set(timer_g, HDF5_MPI_WRITE, TSTOP); + return err; +} + +#endif /* TIME_MPI */ +#endif /* H5_HAVE_PARALLEL */ |