From bfe92300fb50413b363835fdc4a4e6a8bd5ff8ee Mon Sep 17 00:00:00 2001 From: Robb Matzke Date: Tue, 3 Dec 2002 19:43:24 -0500 Subject: [svn-r6148] ./hdf5-devel/src/H5FDmpiposix.c Purpose: Feature; Optimization Description: Clients pass `-1' or make their own #define for HDF5 functions that take an optional object ID. Blue's GPFS is slow for typical SAF restart dumps. Solution: Added a #define for H5I_INVALID_HID Added GPFS-specific code to H5FDmpiposix.c that tells mmfsd to forego byte range token prefetching. This code can be compiled into the library by defining USE_GPFS_HINTS. The plan is to either generalize this so it's detected during configure and turned on/off at runtime, or to move it up into DSL/SAF with the new HDF5 functions to that return the low-level file handle. Platforms tested: SuSE Linux (arborea), gcc and mpich-1.2.4 SunOS (baldric), gcc --- src/H5FDmpiposix.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/src/H5FDmpiposix.c b/src/H5FDmpiposix.c index 4b5a26c..e50afb2 100644 --- a/src/H5FDmpiposix.c +++ b/src/H5FDmpiposix.c @@ -41,6 +41,14 @@ #include "H5MMprivate.h" /*memory allocation */ #include "H5Pprivate.h" /*property lists */ +/* Features: + * USE_GPFS_HINTS -- issue gpfs_fcntl() calls to hopefully improve + * performance when accessing files on a GPFS + * file system. + * + * REPORT_IO -- if set then report all POSIX file calls to stderr. + * + */ #ifdef USE_GPFS_HINTS # include #endif @@ -632,12 +640,14 @@ H5FD_mpiposix_open(const char *name, unsigned flags, hid_t fapl_id, HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code); #ifdef USE_GPFS_HINTS - /* Prevent GPFS from prefetching byte range (BR) tokens */ { + /* Free all byte range tokens. This is a good thing to do if raw data is aligned on 256kB boundaries (a GPFS page is + * 256kB). Care should be taken that there aren't too many sub-page writes, or the mmfsd may become overwhelmed. This + * should probably eventually be passed down here as a property. The gpfs_fcntl() will most likely fail if `fd' isn't + * on a GPFS file system. */ struct { gpfsFcntlHeader_t hdr; gpfsFreeRange_t fr; - gpfsMultipleAccessRange_t mar; } hint; memset(&hint, 0, sizeof hint); hint.hdr.totalLength = sizeof hint; @@ -646,15 +656,8 @@ H5FD_mpiposix_open(const char *name, unsigned flags, hid_t fapl_id, hint.fr.structType = GPFS_FREE_RANGE; hint.fr.start = 0; hint.fr.length = 0; - hint.mar.structLen = sizeof hint.mar; - hint.mar.structType = GPFS_MULTIPLE_ACCESS_RANGE; - hint.mar.accRangeCnt = 1; - hint.mar.accRangeArray[0].blockNumber = 1 + mpi_rank; - hint.mar.accRangeArray[0].start = 0; - hint.mar.accRangeArray[0].length = sb.st_blksize; - hint.mar.accRangeArray[0].isWrite = true; - if (gpfs_fcntl(f->fd, &hint)<0) + if (gpfs_fcntl(fd, &hint)<0) HGOTO_ERROR(H5E_FILE, H5E_FCNTL, NULL, "failed to send hints to GPFS"); if (0==mpi_rank) @@ -666,6 +669,10 @@ H5FD_mpiposix_open(const char *name, unsigned flags, hid_t fapl_id, if (NULL==(file=H5MM_calloc(sizeof(H5FD_mpiposix_t)))) HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed"); +#ifdef REPORT_IO + fprintf(stderr, "open: rank=%d name=%s file=0x%08lx\n", mpi_rank, name, (unsigned long)file); +#endif + /* Set the general file information */ file->fd = fd; file->eof = sb.st_size; @@ -1030,6 +1037,15 @@ H5FD_mpiposix_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t UNUSED dxpl_id, if (addr+size>file->eoa) HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "addr overflow"); +#ifdef REPORT_IO + { + int commrank; + MPI_Comm_rank(MPI_COMM_WORLD, &commrank); + fprintf(stderr, "read: rank=%d file=0x%08lx type=%d, addr=%lu size=%lu\n", + commrank, (unsigned long)file, (int)type, (unsigned long)addr, (unsigned long)size); + } +#endif + /* Seek to the correct location */ if ((addr!=file->pos || OP_READ!=file->op) && file_seek(file->fd, (file_offset_t)addr, SEEK_SET)<0) @@ -1150,6 +1166,38 @@ H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, HGOTO_DONE(SUCCEED) /* skip the actual write */ } /* end if */ +#ifdef REPORT_IO + { + int commrank; + MPI_Comm_rank(MPI_COMM_WORLD, &commrank); + fprintf(stderr, "write: rank=%d file=0x%08lx type=%d, addr=%lu size=%lu %s\n", + commrank, (unsigned long)file, (int)type, (unsigned long)addr, (unsigned long)size, + 0==file->naccess?"(FIRST ACCESS)":""); + } +#endif + + if (0==file->naccess++) { + /* First write access to this file */ +#ifdef USE_GPFS_HINTS + struct { + gpfsFcntlHeader_t hdr; + gpfsMultipleAccessRange_t mar; + } hint; + memset(&hint, 0, sizeof hint); + hint.hdr.totalLength = sizeof hint; + hint.hdr.fcntlVersion = GPFS_FCNTL_CURRENT_VERSION; + hint.mar.structLen = sizeof hint.mar; + hint.mar.structType = GPFS_MULTIPLE_ACCESS_RANGE; + hint.mar.accRangeCnt = 1; + hint.mar.accRangeArray[0].blockNumber = addr / file->blksize; + hint.mar.accRangeArray[0].start = addr % file->blksize; + hint.mar.accRangeArray[0].length = MIN(file->blksize-hint.mar.accRangeArray[0].start, size); + hint.mar.accRangeArray[0].isWrite = 1; + if (gpfs_fcntl(file->fd, &hint)<0) + HGOTO_ERROR(H5E_FILE, H5E_FCNTL, NULL, "failed to send hints to GPFS"); +#endif + } + /* Seek to the correct location */ if ((addr!=file->pos || OP_WRITE!=file->op) && file_seek(file->fd, (file_offset_t)addr, SEEK_SET)<0) -- cgit v0.12