diff options
Diffstat (limited to 'src/H5Fmpio.c')
-rw-r--r-- | src/H5Fmpio.c | 1138 |
1 files changed, 0 insertions, 1138 deletions
diff --git a/src/H5Fmpio.c b/src/H5Fmpio.c deleted file mode 100644 index 88f9877..0000000 --- a/src/H5Fmpio.c +++ /dev/null @@ -1,1138 +0,0 @@ -/* - * Copyright (C) 1998 NCSA - * All rights reserved. - * - * Programmer: - * January 30, 1998 - * - * Purpose: This is the MPI2 I/O subclass of H5Flow. - * - * Problems and limitations: - * - * H5F_mpio_access - * - Since there is no "access" function for MPI-IO files - * we open (i.e., MPI_File_open) the file to see if it exists - * and to infer the access flags. If the file is opened, - * we close it without reading or writing it. - * - It is not possible within MPI-IO to determine whether or not - * the names "file1" and "file2" refer to the same physical file - * (at least not without writing one and reading the other). - * So we do what H5F_core_open() does: return a bogus device - * number and a unique inode number. - * This has the side effect that calling H5Fopen() twice - * with the same name really does open the file twice - * and the two handles don't communicate with each other, - * resulting in trashing the file. It also runs the (very - * small) risk of having two unrelated names be seen as the - * same file. - * - * H5F_mpio_open - * - "unique" key treated same as in H5F_mpio_access - * - * H5F_mpio_read & H5F_mpio_write - * - Eventually these should choose collective or independent i/o - * based on a parameter that is passed down to it from H5Dwrite, - * rather than the access_parms (which are fixed at the open). - * - * H5F_mpio_read - * - One implementation of MPI/MPI-IO causes MPI_Get_count - * to return (incorrectly) a negative count. - * I added code to detect this, and a kludge to pretend - * that the number of bytes read is always equal to the number - * requested. This kluge is activated by #ifdef MPI_KLUGE0202. - * - */ -#include <H5private.h> -#include <H5Eprivate.h> -#include <H5Dprivate.h> -#include <H5MMprivate.h> - -#ifndef HAVE_PARALLEL -/* - * The H5F_mpio_xxxx functions are for parallel I/O only and are - * valid only when HAVE_PARALLEL is #defined. This empty #ifndef - * body is used to allow this source file be included in the serial - * distribution. - * Some compilers/linkers may complain about "empty" object file. - * If that happens, uncomment the following statement to pacify - * them. - */ -/* const hbool_t H5F_mpio_avail = FALSE; */ -#else /* HAVE_PARALLEL */ - -#define PABLO_MASK H5Fmpio_mask -static intn interface_initialize_g = 0; -#define INTERFACE_INIT NULL - -/* Global var to allow elimination of redundant metadata writes - * to be controlled by the value of an environment variable. */ -/* Use the elimination by default unless this is the Intel Red machine */ -#ifndef __PUMAGON__ -hbool_t H5_mpi_1_metawrite_g = TRUE; -#else -hbool_t H5_mpi_1_metawrite_g = FALSE; -#endif - -#define H5F_MPIO_DEV 0xfffe /*pseudo dev for MPI-IO until we fix things */ - /* Make sure this differs from H5F_CORE_DEV */ - -#ifdef H5Fmpio_DEBUG -/* Flags to control debug actions in H5Fmpio. - * Meant to be indexed by characters. - * - * 'c' show result of MPI_Get_count after read - * 'r' show read offset and size - * 't' trace function entry and exit - * 'w' show write offset and size - */ -static int H5F_mpio_Debug[256] = - { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; -#endif - -static htri_t H5F_mpio_access(const char *name, - const H5F_access_t *access_parms, int mode, - H5F_search_t *key/*out*/); -static H5F_low_t *H5F_mpio_open(const char *name, - const H5F_access_t *access_parms, uintn flags, - H5F_search_t *key/*out*/); -static herr_t H5F_mpio_close(H5F_low_t *lf, const H5F_access_t *access_parms); -static herr_t H5F_mpio_read(H5F_low_t *lf, H5F_access_t *access_parms, - const H5F_xfer_t *xfer_parms, haddr_t addr, - size_t size, uint8_t *buf/*out*/); -htri_t H5F_mpio_tas_allsame(H5F_low_t *lf, hbool_t newval ); -static herr_t H5F_mpio_write(H5F_low_t *lf, H5F_access_t *access_parms, - const H5F_xfer_t *xfer_parms, haddr_t addr, - size_t size, const uint8_t *buf); -static herr_t H5F_mpio_flush(H5F_low_t *lf, const H5F_access_t *access_parms); -static herr_t H5F_MPIOff_to_haddr(MPI_Offset mpi_off, haddr_t *addr_p/*out*/); -static herr_t H5F_haddr_to_MPIOff(haddr_t addr, MPI_Offset *mpi_off/*out*/); - -const H5F_low_class_t H5F_LOW_MPIO_g[1] = {{ - H5F_mpio_access, /*access method */ - H5F_mpio_open, /*open method */ - H5F_mpio_close, /*close method */ - - /* rky 980816 - * this is ugly, but removing the const modifier from access_parms - * in the parameter list of the write function in H5F_low_class_t - * would propagate to a lot of functions that don't change that param */ - (int(*)(struct H5F_low_t *lf, const H5F_access_t *access_parms, - const H5F_xfer_t *xfer_parms, haddr_t addr, size_t size, - uint8_t *buf)) - H5F_mpio_read, /*read method */ - - /* rky 980816 - * this is ugly, but removing the const modifier from access_parms - * in the parameter list of the write function in H5F_low_class_t - * would propagate to a lot of functions that don't change that param */ - (int(*)(struct H5F_low_t *lf, const H5F_access_t *access_parms, - const H5F_xfer_t *xfer_parms, haddr_t addr, size_t size, - const uint8_t *buf)) - H5F_mpio_write, /*write method */ - - H5F_mpio_flush, /*flush method */ - NULL, /*extend method */ - NULL, /*alloc method */ -}}; - -ino_t mpio_inode_num = 0; /* fake "inode" number */ - - -/*------------------------------------------------------------------------- - * Function: H5F_mpio_access - * - * Purpose: Determines if an MPI-IO file can be accessed in a particular - * way. The access modes for a file are the same as those of - * access(2), namely - * - * F_OK: determines if the MPI-IO file exists - * (in fact, we can only determine that the file can be - * opened for reading or writing, or neither) - * - * R_OK: determines if the MPI-IO file is readable - * - * W_OK: determines if the MPI-IO file is writable. - * - * Warning: It is not possible within MPI-IO to determine whether or not - * the names "file1" and "file2" refer to the same physical fileC - * (at least not without writing one and reading the other). - * So we do what H5F_core_open() does: return a bogus device number - * and a unique inode number. - * This has the side effect that calling H5Fopen() twice - * with the same name really does open the file twice - * and the two handles don't communicate with each other, - * resulting in trashing the file. It also runs the (very small) - * risk of having two unrelated names be seen as the same file. - * - * Must call this routine collectively since it collectively - * calls MPI_File_open with the communicator in access_parms. - * - * Return: Success: TRUE or FALSE. If TRUE, then KEY is - * initialized with data that makes this file - * unique (same value as H5F_low_open). - * - * Failure: FAIL, KEY is undefined. - * - * Programmer: - * January 30, 1998 - * - * Modifications: - * - * Robb Matzke, 18 Feb 1998 - * Added the ACCESS_PARMS argument. - * - * June 9, 1998 Albert Cheng - * Instead of opening the file with COMM_SELF (which results in - * racing condition in routine that calls it), open it with the - * communicator in access_parms. (This assumes this access call - * must be called collectively.) - * - *------------------------------------------------------------------------- - */ -static htri_t -H5F_mpio_access(const char *name, const H5F_access_t *access_parms, int mode, - H5F_search_t *key/*out*/) -{ - htri_t ret_val = FALSE; - MPI_File fh; - int mpierr; - int mpi_mode; - - FUNC_ENTER(H5F_mpio_access, FAIL); -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) - fprintf(stdout, "Entering H5F_mpio_access name=%s mode=0x%x\n", name, mode ); -#endif - assert(access_parms->driver == H5F_LOW_MPIO); - - /* The only way to get this info in MPI-IO is to try to open the file */ - /* (though particular implementations of MPI-IO may allow other ways) */ - switch (mode) { - case F_OK: mpi_mode = MPI_MODE_RDONLY; - /* to see if it exists, first try to open for read */ - break; - case R_OK: mpi_mode = MPI_MODE_RDONLY; - break; - case W_OK: mpi_mode = MPI_MODE_WRONLY; - break; - default: HRETURN_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, - "invalid mode parameter"); - } - - /* (char*) name is okay since MPI_File_open will not change it. */ - mpierr = MPI_File_open(access_parms->u.mpio.comm, (char*) name, - mpi_mode, access_parms->u.mpio.info, &fh ); - if (MPI_SUCCESS == mpierr) { - mpierr = MPI_File_close( &fh ); - if (MPI_SUCCESS != mpierr) - HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, "MPI_File_close failed"); - ret_val = TRUE; - } else if (mode == F_OK) { - /* to see if it exists, this time try to open for write */ - mpierr = MPI_File_open(access_parms->u.mpio.comm, (char*)name, - MPI_MODE_WRONLY, access_parms->u.mpio.info, - &fh ); - if (MPI_SUCCESS == mpierr) { - mpierr = MPI_File_close( &fh ); - if (MPI_SUCCESS != mpierr) - HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, "MPI_File_close failed"); - ret_val = TRUE; - } - } - - /* if the file exists, provide its (not really) unique key */ - if ((ret_val==TRUE) && key) { - key->dev = H5F_MPIO_DEV; - key->ino = mpio_inode_num++; - } - -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) { - if (key && (ret_val==TRUE)) - fprintf(stdout, - "Leaving H5F_mpio_access ret_val=%d key->dev=0x%x key->ino=%d\n", - ret_val, key->dev, key->ino ); - else - fprintf(stdout, "Leaving H5F_mpio_access ret_val=%d\n", ret_val ); - } -#endif - - FUNC_LEAVE(ret_val); -} - -/*------------------------------------------------------------------------- - * Function: H5F_mpio_open - * - * Purpose: Opens a file with name NAME. The FLAGS are a bit field with - * the possible values defined in H5F_low_open(). - * - * Errors: - * IO CANTOPENFILE MPI_File_open failed. - * IO CANTOPENFILE MPI_File_get_size failed. - * IO CANTOPENFILE MPI_File_set_size failed (for truncate). - * - * Return: Success: Low-level file pointer - * - * Failure: NULL - * - * Programmer: - * January 30, 1998 - * - * Modifications: - * - * Robb Matzke, 18 Feb 1998 - * Added the ACCESS_PARMS argument. Moved some error checking here from - * elsewhere. - * - * rky, 11 Jun 1998 - * Added H5F_mpio_Debug debug flags controlled by MPI_Info. - * - * rky 980828 Init flag controlling redundant metadata writes to disk. - * - * rky 19981207 Added barrier after MPI_File_set_size to prevent - * race condition: subsequent writes were being truncated, - * causing holes in file. - *------------------------------------------------------------------------- - */ -static H5F_low_t * -H5F_mpio_open(const char *name, const H5F_access_t *access_parms, uintn flags, - H5F_search_t *key/*out*/) -{ - H5F_low_t *lf = NULL; - MPI_File fh; - int mpi_amode; - char mpierrmsg[MPI_MAX_ERROR_STRING]; - int mpierr, msglen; - MPI_Offset size; - - FUNC_ENTER(H5F_mpio_open, NULL); -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) - fprintf(stdout, "Entering H5F_mpio_open name=%s flags=0x%x\n", name, flags ); -#endif - - /* convert HDF5 flags to MPI-IO flags */ - /* some combinations are illegal; let MPI-IO figure it out */ - mpi_amode = (flags&H5F_ACC_RDWR) ? MPI_MODE_RDWR : MPI_MODE_RDONLY; - if (flags&H5F_ACC_CREAT) mpi_amode |= MPI_MODE_CREATE; - if (flags&H5F_ACC_EXCL) mpi_amode |= MPI_MODE_EXCL; - -#ifdef H5Fmpio_DEBUG - { - /* set debug mask */ - /* Should this be done in H5F global initialization instead of here? */ - const char *s = HDgetenv ("H5F_mpio_Debug"); - if (s) { - while (*s){ - H5F_mpio_Debug[(int)*s]++; - s++; - } - } - } - /* Check for debug commands in the info parameter */ - { char debug_str[128]; - int infoerr, flag, i; - if (access_parms->u.mpio.info) { - infoerr = MPI_Info_get( access_parms->u.mpio.info, - H5F_MPIO_DEBUG_KEY, 127, debug_str, &flag ); - if (flag) { - fprintf(stdout, "H5Fmpio debug flags=%s\n", debug_str ); - for (i=0; - debug_str[i]/*end of string*/ && i<128/*just in case*/; - ++i) { - H5F_mpio_Debug[(int)debug_str[i]] = 1; - } - } - } - } -#endif - - mpierr = MPI_File_open(access_parms->u.mpio.comm, (char*)name, mpi_amode, - access_parms->u.mpio.info, &fh); - if (MPI_SUCCESS != mpierr) { - MPI_Error_string( mpierr, mpierrmsg, &msglen ); - HRETURN_ERROR(H5E_IO, H5E_CANTOPENFILE, NULL, mpierrmsg ); - } - - /* truncate the file, if requested */ - if (flags&H5F_ACC_TRUNC) { - mpierr = MPI_File_set_size( fh, (MPI_Offset)0 ); - if (MPI_SUCCESS != mpierr) { - MPI_File_close( &fh ); - HRETURN_ERROR(H5E_IO, H5E_CANTOPENFILE, NULL, - "MPI_File_set_size failed trying to truncate file" ); - } - /* Don't let any proc return until all have truncated the file. */ - mpierr = MPI_Barrier( access_parms->u.mpio.comm ); - if (MPI_SUCCESS!=mpierr) { - MPI_File_close( &fh ); - HRETURN_ERROR( H5E_IO, H5E_MPI, NULL, "MPI_Barrier failed" ); - } - } - - /* Build the return value */ - if (NULL==(lf = H5MM_calloc(sizeof(H5F_low_t)))) { - HRETURN_ERROR (H5E_RESOURCE, H5E_NOSPACE, NULL, - "memory allocation failed"); - } - lf->u.mpio.f = fh; - H5F_mpio_tas_allsame( lf, FALSE ); /* initialize */ - lf->eof = 0; - mpierr = MPI_File_get_size( fh, &size ); - if (MPI_SUCCESS != mpierr) { - MPI_File_close( &(lf->u.mpio.f) ); - MPI_Error_string( mpierr, mpierrmsg, &msglen ); - HRETURN_ERROR(H5E_IO, H5E_CANTOPENFILE, NULL, mpierrmsg ); - } else { - haddr_t new_eof; - if (SUCCEED != H5F_MPIOff_to_haddr( size, &new_eof )) { - MPI_File_close( &(lf->u.mpio.f) ); - HRETURN_ERROR(H5E_IO, H5E_CANTOPENFILE, NULL, - "couldn't convert size to haddr_t" ); - } - H5F_low_seteof(lf, new_eof); - } - - /* The unique key */ - if (key) { - key->dev = H5F_MPIO_DEV; - key->ino = mpio_inode_num++; - } - -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) { - if (key) - fprintf(stdout, "Leaving H5F_mpio_open key->dev=0x%x key->ino=%d\n", - key->dev, key->ino ); - else - fprintf(stdout, "Leaving H5F_mpio_open\n" ); - } -#endif - - FUNC_LEAVE(lf); -} - -/*------------------------------------------------------------------------- - * Function: H5F_mpio_close - * - * Purpose: Closes a file. - * - * Errors: - * IO CLOSEERROR Fclose failed. - * - * Return: Non-negative on success/Negative on failure - * - * Programmer: - * January 30, 1998 - * - * Modifications: - * - * Robb Matzke, 18 Feb 1998 - * Added the ACCESS_PARMS argument. - * - *------------------------------------------------------------------------- - */ -static herr_t -H5F_mpio_close(H5F_low_t *lf, const H5F_access_t UNUSED *access_parms) -{ - int mpierr; - char mpierrmsg[MPI_MAX_ERROR_STRING]; - int msglen; - - FUNC_ENTER(H5F_mpio_close, FAIL); -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) - fprintf(stdout, "Entering H5F_mpio_close\n" ); -#endif - - mpierr = MPI_File_close( &(lf->u.mpio.f) ); - /* MPI_File_close sets lf->u.mpio.f to MPI_FILE_NULL */ - - if (MPI_SUCCESS != mpierr) { - MPI_Error_string( mpierr, mpierrmsg, &msglen ); - HRETURN_ERROR(H5E_IO, H5E_CLOSEERROR, FAIL, mpierrmsg ); - } - -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) - fprintf(stdout, "Leaving H5F_mpio_close\n" ); -#endif - FUNC_LEAVE(SUCCEED); -} - -/*------------------------------------------------------------------------- - * Function: H5F_mpio_read - * - * Purpose: Depending on a field in access params, either: - * - Writes SIZE bytes from the beginning of BUF into file LF - * at file address ADDR. - * - Reads SIZE bytes beginning at address ADDR in file LF - * and places them in buffer BUF. - * - Uses the (potentially complex) file and buffer types - * to effect the transfer. - * This can allow MPI to coalesce requests from - * different processes (collective or independent). - * - * Reading past the end of the MPI file - * returns zeros instead of failing. - * - * Errors: - * IO READERROR MPI_File_read_at failed. - * IO READERROR MPI_Get_count failed - * - * Return: Non-negative on success/Negative on failure - * (use_types and old_use_types in the access params are altered) - * - * Programmer: rky 980130 - * - * Modifications: - * - * Robb Matzke, 18 Feb 1998 - * Added the ACCESS_PARMS argument. - * - * rky, 10 Apr 1998 - * Call independent or collective MPI read, based on ACCESS_PARMS. - * - * Albert Cheng, June 1, 1998 - * Added xfer_mode to control independent or collective MPI read. - * - * rky 980816 - * Use btype, ftype, and disp from access parms. - * The guts of H5F_mpio_read and H5F_mpio_write - * should be replaced by a single dual-purpose routine. - * - * Robb Matzke, 1999-04-21 - * Changed xfer_mode to xfer_parms for all H5F_*_read() callbacks. - * - * Robb Matzke, 1999-07-28 - * The ADDR argument is passed by value. - *------------------------------------------------------------------------- - */ -static herr_t -H5F_mpio_read(H5F_low_t *lf, H5F_access_t *access_parms, - const H5F_xfer_t *xfer_parms, haddr_t addr, size_t size, - uint8_t *buf/*out*/) -{ - MPI_Offset mpi_off, mpi_disp; - MPI_Status mpi_stat; - MPI_Datatype buf_type, file_type; - int mpierr, msglen, size_i, bytes_read, n; - int use_types_this_time, used_types_last_time; - char mpierrmsg[MPI_MAX_ERROR_STRING]; - - FUNC_ENTER(H5F_mpio_read, FAIL); -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) - fprintf(stdout, "Entering H5F_mpio_read\n" ); -#endif - - /* some numeric conversions */ - if (SUCCEED != H5F_haddr_to_MPIOff(addr, &mpi_off)) { - HRETURN_ERROR(H5E_IO, H5E_BADTYPE, FAIL, - "couldn't convert addr to MPIOffset" ); - } - size_i = (int)size; - if ((size_t)size_i != size) { /* check type conversion */ - HRETURN_ERROR(H5E_IO, H5E_BADTYPE, FAIL, - "couldn't convert size to int" ); - } - -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'r']) - HDfprintf(stdout, "in H5F_mpio_read mpi_off=%Hd size_i=%d\n", - (hssize_t)mpi_off, size_i ); -#endif - - /* Set up for a fancy xfer using complex types, or single byte block. - * We wouldn't need to rely on the use_types field - * if MPI semantics allowed us to test that btype=ftype=MPI_BYTE - * (or even MPI_TYPE_NULL, which could mean "use MPI_BYTE" by convention). - */ - use_types_this_time = access_parms->u.mpio.use_types; - if (use_types_this_time) { - /* prepare for a full-blown xfer using btype, ftype, and disp */ - buf_type = access_parms->u.mpio.btype; - file_type = access_parms->u.mpio.ftype; - if (SUCCEED != - H5F_haddr_to_MPIOff(access_parms->u.mpio.disp, &mpi_disp)) { - HRETURN_ERROR(H5E_IO, H5E_BADTYPE, FAIL, - "couldn't convert addr to MPIOffset" ); - } - } else { - /* Prepare for a simple xfer of a contiguous block of bytes. - * The btype, ftype, and disp fields are not used. */ - buf_type = MPI_BYTE; - file_type = MPI_BYTE; - mpi_disp = 0; /* mpi_off is sufficient */ - } - - /* Don't bother to reset the view if we're not using the types this time, - * and did we didn't use them last time either. */ - used_types_last_time = access_parms->u.mpio.old_use_types; - if (used_types_last_time /* change to new ftype or MPI_BYTE */ - || use_types_this_time) /* almost certainly a different ftype */ { - mpierr = MPI_File_set_view( lf->u.mpio.f, mpi_disp, - MPI_BYTE, file_type, - "native", access_parms->u.mpio.info ); - if (MPI_SUCCESS != mpierr) { - MPI_Error_string( mpierr, mpierrmsg, &msglen ); - HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, mpierrmsg ); - } - } - /* We always set the use_types flag to 0 because the - * default is not to use types next time, - * unless someone explicitly requests it by setting this flag to !=0. */ - access_parms->u.mpio.old_use_types = use_types_this_time; - access_parms->u.mpio.use_types = 0; - - /* Read the data. */ - switch (xfer_parms->xfer_mode){ - case H5D_XFER_INDEPENDENT: - case H5D_XFER_DFLT: - mpierr = MPI_File_read_at ( lf->u.mpio.f, mpi_off, (void*) buf, - size_i, buf_type, &mpi_stat ); - break; - - case H5D_XFER_COLLECTIVE: -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) - fprintf(stdout, "%s: using MPIO collective mode\n", FUNC); -#endif - mpierr = MPI_File_read_at_all ( lf->u.mpio.f, mpi_off, (void*) buf, - size_i, buf_type, &mpi_stat ); - break; - - default: - HRETURN_ERROR(H5E_IO, H5E_BADVALUE, FAIL, "invalid file access mode"); - } - if (MPI_SUCCESS != mpierr) { - MPI_Error_string( mpierr, mpierrmsg, &msglen ); - HRETURN_ERROR(H5E_IO, H5E_READERROR, FAIL, mpierrmsg ); - } - - /* How many bytes were actually read? */ - mpierr = MPI_Get_count( &mpi_stat, MPI_BYTE, &bytes_read ); -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'c']) - fprintf(stdout, - "In H5F_mpio_read after Get_count size_i=%d bytes_read=%d\n", - size_i, bytes_read ); -#endif - if (MPI_SUCCESS != mpierr) { - MPI_Error_string( mpierr, mpierrmsg, &msglen ); - HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, mpierrmsg ); - } - -#define MPI_KLUGE0202 -#ifdef MPI_KLUGE0202 - /* KLUGE rky 980202 MPI_Get_count incorrectly returns negative count; - fake a complete read */ - bytes_read = size_i; /* KLUGE rky 980202 */ -#endif - - if ((bytes_read<0) || (bytes_read > size_i)) { - HRETURN_ERROR(H5E_IO, H5E_READERROR, FAIL, - "MPI_Get_count returned invalid count" ); - } - - /* This gives us zeroes beyond end of physical MPI file. - * What about reading past logical end of HDF5 file??? */ - if ((n=(size_i-bytes_read)) > 0) { - if (use_types_this_time) { - /* INCOMPLETE rky 980918 Not implemented yet. What to do??? */ - HRETURN_ERROR(H5E_IO, H5E_UNSUPPORTED, FAIL, - "haven't implemented reading zeroes beyond end of file" ); - } else { - HDmemset( buf+bytes_read, 0, (size_t)n ); - } - } - -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) - fprintf(stdout, "Leaving H5F_mpio_read\n" ); -#endif - FUNC_LEAVE(SUCCEED); -} /* H5F_mpio_read */ - -/*------------------------------------------------------------------------- - * Function: H5F_mpio_tas_allsame - * - * Purpose: Test and set the allsame parameter. - * - * Errors: - * - * Return: Success: the old value of the allsame flag - * - * Failure: assert fails if access_parms is NULL. - * - * Programmer: rky 980828 - * - * Modifications: - * - *------------------------------------------------------------------------- - */ -htri_t -H5F_mpio_tas_allsame(H5F_low_t *lf, hbool_t newval ) -{ - hbool_t oldval; - - FUNC_ENTER(H5F_mpio_tas_allsame, FALSE); -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) - fprintf(stdout, "Entering H5F_mpio_tas_allsame, newval=%d\n", newval ); -#endif - - assert(lf); - oldval = lf->u.mpio.allsame; - lf->u.mpio.allsame = newval; - -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) - fprintf(stdout, "Leaving H5F_mpio_tas_allsame, oldval=%d\n", oldval ); -#endif - FUNC_LEAVE(oldval); -} - -/*------------------------------------------------------------------------- - * Function: H5F_mpio_write - * - * Purpose: Depending on a field in access params, either: - * - Writes SIZE bytes from the beginning of BUF into file LF - * at file address ADDR. - * - Uses the (potentially complex) file and buffer types - * to effect the transfer. - * This can allow MPI to coalesce requests from - * different processes (collective or independent). - * - * rky 980828 - * If the allsame flag is set, we assume that all the procs - * in the relevant MPI communicator will write identical data - * at identical offsets in the file, so only proc 0 will write, - * and all other procs will wait for p0 to finish. - * This is useful for writing metadata, for example. - * Note that we don't _check_ that the data is identical. - * ALso, the mechanism we use to eliminate the redundant writes - * is by requiring a call to H5F_mpio_tas_allsame before the write, - * which is rather klugey. - * Would it be better to pass a parameter to low-level writes - * like H5F_block_write and H5F_low_write, instead? Or...??? - * Also, when I created this mechanism I wanted to minimize - * the difference in behavior between the old way of doing things - * (i.e., all procs write) and the new way, so the writes are - * eliminated at the very lowest level, here in H5F_mpio_write. - * It may be better to rethink that, and short-circuit the writes - * at a higher level (e.g., at the points in the code where - * H5F_mpio_tas_allsame is called). - * - * Errors: - * IO WRITEERROR MPI_File_write_at failed. - * - * Return: Non-negative on success/Negative on failure - * (use_types and old_use_types in the access params are altered) - * - * Programmer: - * January 30, 1998 - * - * Modifications: - * - * Robb Matzke, 18 Feb 1998 - * Added the ACCESS_PARMS argument. - * - * rky, 10 Apr 1998 - * Call independent or collective MPI write, based on ACCESS_PARMS. - * - * rky, 24 April - * Removed redundant write from H5F_Mpio_write. - * - * Albert Cheng, June 1, 1998 - * Added xfer_mode to control independent or collective MPI write. - * - * rky 980816 - * Use btype, ftype, and disp from access parms. - * The guts of H5F_mpio_read and H5F_mpio_write - * should be replaced by a single dual-purpose routine. - * - * rky, 980828 - * Added allsame parameter to make all but proc 0 skip the actual write. - * - * Robb Matzke, 1999-04-21 - * Changed XFER_MODE to XFER_PARMS for all H5F_*_write() callbacks. - * - * Robb Matzke, 1999-07-28 - * The ADDR argument is passed by value. - *------------------------------------------------------------------------- - */ -static herr_t -H5F_mpio_write(H5F_low_t *lf, H5F_access_t *access_parms, - const H5F_xfer_t *xfer_parms, haddr_t addr, size_t size, - const uint8_t *buf) -{ - MPI_Offset mpi_off, mpi_disp; - MPI_Status mpi_stat; - MPI_Datatype buf_type, file_type; - int mpierr, msglen, size_i, bytes_written, mpi_rank; - int use_types_this_time, used_types_last_time; - char mpierrmsg[MPI_MAX_ERROR_STRING]; - hbool_t allsame; - - FUNC_ENTER(H5F_mpio_write, FAIL); -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) - fprintf(stdout, "Entering H5F_mpio_write\n" ); -#endif - - /* some numeric conversions */ - if (SUCCEED != H5F_haddr_to_MPIOff(addr, &mpi_off)) { - HRETURN_ERROR(H5E_IO, H5E_BADTYPE, FAIL, - "couldn't convert addr to MPIOffset" ); - } - if (SUCCEED!=H5F_haddr_to_MPIOff(access_parms->u.mpio.disp, &mpi_disp)) { - HRETURN_ERROR(H5E_IO, H5E_BADTYPE, FAIL, - "couldn't convert addr to MPIOffset" ); - } - size_i = (int)size; - if ((size_t)size_i != size) { /* check type conversion */ - HRETURN_ERROR(H5E_IO, H5E_BADTYPE, FAIL, - "couldn't convert size to int" ); - } - -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'w']) - HDfprintf(stdout, "in H5F_mpio_write mpi_off=%Hd size_i=%d\n", - (hssize_t)mpi_off, size_i ); -#endif - - /* Only p0 will do the actual write if all procs in comm write same data */ - allsame = H5F_mpio_tas_allsame( lf, FALSE ); - if (allsame && H5_mpi_1_metawrite_g) { - mpierr = MPI_Comm_rank( access_parms->u.mpio.comm, &mpi_rank ); - if (mpierr != MPI_SUCCESS) - HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, "MPI_Comm_rank failed" ); - if (mpi_rank != 0) { -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'w']) { - fprintf(stdout, " in H5F_mpio_write (write omitted)\n" ); - } -#endif - goto done; /* skip the actual write */ - } - } - - /* Set up for a fancy xfer using complex types, or single byte block. - * We wouldn't need to rely on the use_types field - * if MPI semantics allowed us to test that btype=ftype=MPI_BYTE - * (or even MPI_TYPE_NULL, which could mean "use MPI_BYTE" by convention). - */ - use_types_this_time = access_parms->u.mpio.use_types; - if (use_types_this_time) { - /* prepare for a full-blown xfer using btype, ftype, and disp */ - buf_type = access_parms->u.mpio.btype; - file_type = access_parms->u.mpio.ftype; - if (SUCCEED != - H5F_haddr_to_MPIOff(access_parms->u.mpio.disp, &mpi_disp)) { - HRETURN_ERROR(H5E_IO, H5E_BADTYPE, FAIL, - "couldn't convert addr to MPIOffset" ); - } - } else { - /* Prepare for a simple xfer of a contiguous block of bytes. - * The btype, ftype, and disp fields are not used. */ - buf_type = MPI_BYTE; - file_type = MPI_BYTE; - mpi_disp = 0; /* mpi_off is sufficient */ - } - - /* Don't bother to reset the view if we're not using the types this time, - * and did we didn't use them last time either. */ - used_types_last_time = access_parms->u.mpio.old_use_types; - if (used_types_last_time /* change to new ftype or MPI_BYTE */ - || use_types_this_time) /* almost certainly a different ftype */ { - mpierr = MPI_File_set_view( lf->u.mpio.f, mpi_disp, - MPI_BYTE, file_type, - "native", access_parms->u.mpio.info ); - if (MPI_SUCCESS != mpierr) { - MPI_Error_string( mpierr, mpierrmsg, &msglen ); - HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, mpierrmsg ); - } - } - /* We always set the use_types flag to 0 because the - * default is not to use types next time, - * unless someone explicitly requests it by setting this flag to !=0. */ - access_parms->u.mpio.old_use_types = use_types_this_time; - access_parms->u.mpio.use_types = 0; - - /* Write the data. */ - switch (xfer_parms->xfer_mode){ - case H5D_XFER_INDEPENDENT: - case H5D_XFER_DFLT: - mpierr = MPI_File_write_at ( lf->u.mpio.f, mpi_off, (void*) buf, - size_i, buf_type, &mpi_stat ); - break; - - case H5D_XFER_COLLECTIVE: -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) - fprintf(stdout, "%s: using MPIO collective mode\n", FUNC); -#endif - mpierr = MPI_File_write_at_all( lf->u.mpio.f, mpi_off, (void*) buf, - size_i, buf_type, &mpi_stat ); - break; - - default: - HRETURN_ERROR(H5E_IO, H5E_BADVALUE, FAIL, "invalid file access mode"); - } - if (MPI_SUCCESS != mpierr) { - MPI_Error_string( mpierr, mpierrmsg, &msglen ); - HRETURN_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, mpierrmsg ); - } - - /* How many bytes were actually written? */ - mpierr = MPI_Get_count( &mpi_stat, MPI_BYTE, &bytes_written ); -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'c']) - fprintf(stdout, - "In H5F_mpio_write after Get_count size_i=%d bytes_written=%d\n", - size_i, bytes_written ); -#endif - if (MPI_SUCCESS != mpierr) { - MPI_Error_string( mpierr, mpierrmsg, &msglen ); - HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, mpierrmsg ); - } - -#define MPI_KLUGE0202 -#ifdef MPI_KLUGE0202 - /* KLUGE rky 980202 MPI_Get_count incorrectly returns negative count; - fake a complete write */ - bytes_written = size_i; /* KLUGE rky 980202 */ -#endif - - if ((bytes_written<0) || (bytes_written > size_i)) { - HRETURN_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, - "MPI_Get_count returned invalid count" ); - } - - done: -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) - fprintf(stdout, "Leaving H5F_mpio_write\n" ); -#endif - FUNC_LEAVE(SUCCEED); -} /* H5F_mpio_write */ - -/*------------------------------------------------------------------------- - * Function: H5F_mpio_flush - * - * Purpose: Makes sure that all data is on disk. - * - * Errors: - * IO WRITEERROR MPI_File_sync failed. - * - * Return: Non-negative on success/Negative on failure - * - * Programmer: - * January 30, 1998 - * - * Modifications: - * - * Robb Matzke, 18 Feb 1998 - * Added the ACCESS_PARMS argument. - * - *------------------------------------------------------------------------- - */ -static herr_t -H5F_mpio_flush(H5F_low_t *lf, const H5F_access_t UNUSED *access_parms) -{ - int mpierr; - char mpierrmsg[MPI_MAX_ERROR_STRING]; - int msglen; - - FUNC_ENTER(H5F_mpio_flush, FAIL); -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) - fprintf(stdout, "Entering H5F_mpio_flush\n" ); -#endif - - mpierr = MPI_File_sync( lf->u.mpio.f ); - if (MPI_SUCCESS != mpierr) { - MPI_Error_string( mpierr, mpierrmsg, &msglen ); - HRETURN_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, mpierrmsg ); - } -#ifdef H5Fmpio_DEBUG - if (H5F_mpio_Debug[(int)'t']) - fprintf(stdout, "Leaving H5F_mpio_flush\n" ); -#endif - FUNC_LEAVE(SUCCEED); -} - -/*------------------------------------------------------------------------- - * Function: H5F_MPIOff_to_haddr - * - * Purpose: Convert an MPI_Offset value to haddr_t. - * - * Problems and limitations: - * - * Return: Non-negative on success (the haddr_t contains the converted - * value). Negative on failure (the haddr_t is undefined). - * - * Programmer: - * January 30, 1998 - * - * Modifications: - * Robb Matzke, 1999-04-23 - * An error is reported for address overflows. The ADDR output - * argument is optional. - * - *------------------------------------------------------------------------- - */ -static herr_t -H5F_MPIOff_to_haddr(MPI_Offset mpi_off, haddr_t *addr_p/*out*/) -{ - FUNC_ENTER(H5F_MPIOff_to_haddr, FAIL); - - if (addr_p) *addr_p = (uint64_t) mpi_off; - if (mpi_off != (MPI_Offset)(uint64_t)mpi_off) { - HRETURN_ERROR(H5E_IO, H5E_OVERFLOW, FAIL, "bad MPI address"); - } - - FUNC_LEAVE(SUCCEED); -} - - -/*------------------------------------------------------------------------- - * Function: H5F_haddr_to_MPIOff - * - * Purpose: Convert an haddr_t value to MPI_Offset. - * - * Problems and limitations: - * - * Return: Non-negative on success (the MPIOffset contains the converted - * value). Negative on failure (the MPIOffset is undefined). - * - * Programmer: - * January 30, 1998 - * - * Modifications: - * Robb Matzke, 1999-04-23 - * An error is reported for address overflows. The ADDR output - * argument is optional. - * - * Robb Matzke, 1999-07-28 - * The ADDR argument is passed by value. - *------------------------------------------------------------------------- - */ -static herr_t -H5F_haddr_to_MPIOff(haddr_t addr, MPI_Offset *mpi_off/*out*/) -{ - FUNC_ENTER(H5F_haddr_to_MPIOff, FAIL); - - if (mpi_off) *mpi_off = (MPI_Offset)addr; - if (addr != (uint64_t)(MPI_Offset)(addr)) { - HRETURN_ERROR(H5E_IO, H5E_OVERFLOW, FAIL, - "hdf5 address overflows MPI address"); - } - - FUNC_LEAVE(SUCCEED); -} - - -/*------------------------------------------------------------------------- - * Function: H5PC_Wait_for_left_neighbor - * - * Purpose: Blocks until (empty) msg is received - * from immediately lower-rank neighbor. - * In conjunction with Signal_right_neighbor, - * useful for enforcing 1-process-at-at-time access - * to critical regions to avoid race conditions - * (though it is overkill to require that the processes - * be allowed to proceed strictly in order of their rank). - * - * NOTE: This routine doesn't read or write any file, - * just performs interprocess coordination. - * It really should reside in a separate package of such routines. - * - * Return: Success: SUCCEED - * Failure: FAIL - * - * Programmer: rky - * 19981207 - * - * Modifications: - * - *------------------------------------------------------------------------- - */ -herr_t -H5PC_Wait_for_left_neighbor( MPI_Comm comm ) -{ - char msgbuf[1]; - int myid, mpi_err; - MPI_Status rcvstat; - - FUNC_ENTER (H5PC_Wait_for_left_neighbor, FAIL); - - mpi_err = MPI_Comm_rank( comm, &myid ); - if (MPI_SUCCESS!=mpi_err) - HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, "MPI_Comm_rank failed"); - /* p0 has no left neighbor; all other procs wait for msg */ - if (myid != 0) { - mpi_err = MPI_Recv( &msgbuf, 1, MPI_CHAR, myid-1, MPI_ANY_TAG, comm, - &rcvstat ); - if (MPI_SUCCESS!=mpi_err) - HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, "MPI_Recv failed"); - } - FUNC_LEAVE (SUCCEED); -} /* H5PC_Wait_for_left_neighbor */ - -/*------------------------------------------------------------------------- - * Function: H5PC_Signal_right_neighbor - * - * Purpose: Blocks until (empty) msg is received - * from immediately lower-rank neighbor. - * In conjunction with Wait_for_left_neighbor, - * useful for enforcing 1-process-at-at-time access - * to critical regions to avoid race conditions - * (though it is overkill to require that the processes - * be allowed to proceed strictly in order of their rank). - * - * NOTE: This routine doesn't read or write any file, - * just performs interprocess coordination. - * It really should reside in a separate package of such routines. - * - * Return: Success: SUCCEED - * Failure: FAIL - * - * Programmer: rky - * 19981207 - * - * Modifications: - * - *------------------------------------------------------------------------- - */ -herr_t -H5PC_Signal_right_neighbor( MPI_Comm comm ) -{ - char msgbuf[1]; - int myid, numprocs, mpi_err; - - FUNC_ENTER (H5PC_Signal_right_neighbor, FAIL); - - mpi_err = MPI_Comm_size( comm, &numprocs ); - if (MPI_SUCCESS!=mpi_err) - HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, "MPI_Comm_rank failed"); - mpi_err = MPI_Comm_rank( comm, &myid ); - if (MPI_SUCCESS!=mpi_err) - HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, "MPI_Comm_rank failed"); - if (myid != (numprocs-1)) { - mpi_err = MPI_Send( &msgbuf, 0/*empty msg*/, MPI_CHAR, myid+1, 0, comm); - if (MPI_SUCCESS!=mpi_err) - HRETURN_ERROR(H5E_IO, H5E_MPI, FAIL, "MPI_Send failed"); - } - FUNC_LEAVE (SUCCEED); -} /* H5PC_Signal_right_neighbor */ - -#endif /* HAVE_PARALLEL */ |