/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Copyright by the Board of Trustees of the University of Illinois. * * All rights reserved. * * * * This file is part of HDF5. The full HDF5 copyright notice, including * * terms governing use, modification, and redistribution, is contained in * * the files COPYING and Copyright.html. COPYING can be found at the root * * of the source code distribution tree; Copyright.html can be found at the * * root level of an installed copy of the electronic HDF5 document set and * * is linked from the top-level documents page. It can also be found at * * http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html. If you do not have * * access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu. * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ #include "H5private.h" /* Library functions */ #include "H5ACprivate.h" /* Metadata cache */ #include "H5Eprivate.h" /* Error handling */ #include "H5Fprivate.h" /* Files */ #include "H5FDprivate.h" /* File driver */ #include "H5FDfphdf5.h" /* Flexible PHDF5 I/O file driver */ #include "H5Iprivate.h" /* Object IDs */ #include "H5MMprivate.h" /* Memory allocation */ #include "H5Pprivate.h" /* Property lists */ #ifdef H5_HAVE_FPHDF5 #include "H5FPprivate.h" /* Flexible PHDF5 */ /* * The driver identification number, initialized at runtime if * H5_HAVE_FPHDF5 is defined. This allows applications to still have * the H5FD_FPHDF5 "constants" in their source code (it also makes this * file strictly ANSI compliant when H5_HAVE_FPHDF5 isn't defined) */ static hid_t H5FD_FPHDF5_g = 0; /* * The description of a file belonging to this driver. * * The EOF value is only used just after the file is opened in order for * the library to determine whether the file is empty, truncated, or * okay. The MPIO driver doesn't bother to keep it updated since it's an * expensive operation. */ typedef struct H5FP_fphdf5_t { H5FD_t pub; /*Public stuff, must be first (ick!) */ MPI_File f; /*MPIO file handle */ MPI_Comm comm; /*Communicator */ MPI_Info info; /*File information */ int mpi_rank; /*This process's rank */ int mpi_size; /*Total number of processes */ int mpi_round; /*Current round robin process (for metadata I/O) */ haddr_t eof; /*End-of-file marker */ haddr_t eoa; /*End-of-address marker */ haddr_t last_eoa; /*Last known end-of-address marker */ } H5FP_fphdf5_t; /* * Prototypes */ static haddr_t H5FD_fphdf5_MPIOff_to_haddr(MPI_Offset mpi_off); static herr_t H5FD_fphdf5_haddr_to_MPIOff(haddr_t addr, MPI_Offset *mpi_off); /* * Callbacks */ static void *H5FD_fphdf5_fapl_get(H5FD_t *_file); static H5FD_t *H5FD_fphdf5_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr); static herr_t H5FD_fphdf5_close(H5FD_t *_file); static herr_t H5FD_fphdf5_query(const H5FD_t *_f1, unsigned long *flags); static haddr_t H5FD_fphdf5_get_eoa(H5FD_t *_file); static herr_t H5FD_fphdf5_set_eoa(H5FD_t *_file, haddr_t addr); static haddr_t H5FD_fphdf5_get_eof(H5FD_t *_file); static herr_t H5FD_fphdf5_get_handle(H5FD_t *_file, hid_t fapl, void **file_handle); static herr_t H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, size_t size, void *buf); static herr_t H5FD_fphdf5_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, size_t size, const void *buf); static herr_t H5FD_fphdf5_flush(H5FD_t *_file, unsigned closing); /* * FPHDF5-specific file access properties */ typedef struct H5FD_fphdf5_fapl_t { MPI_Comm comm; /*communicator */ MPI_Info info; /*file information */ } H5FD_fphdf5_fapl_t; /* * The FPHDF5 file driver information */ static const H5FD_class_t H5FD_fphdf5_g = { "fphdf5", /*name */ HADDR_MAX, /*maxaddr */ H5F_CLOSE_SEMI, /*fc_degree */ NULL, /*sb_size */ NULL, /*sb_encode */ NULL, /*sb_decode */ sizeof(H5FD_fphdf5_fapl_t), /*fapl_size */ H5FD_fphdf5_fapl_get, /*fapl_get */ NULL, /*fapl_copy */ NULL, /*fapl_free */ 0, /*dxpl_size */ NULL, /*dxpl_copy */ NULL, /*dxpl_free */ H5FD_fphdf5_open, /*open */ H5FD_fphdf5_close, /*close */ NULL, /*cmp */ H5FD_fphdf5_query, /*query */ NULL, /*alloc */ NULL, /*free */ H5FD_fphdf5_get_eoa, /*get_eoa */ H5FD_fphdf5_set_eoa, /*set_eoa */ H5FD_fphdf5_get_eof, /*get_eof */ H5FD_fphdf5_get_handle, /*get_handle */ H5FD_fphdf5_read, /*read */ H5FD_fphdf5_write, /*write */ H5FD_fphdf5_flush, /*flush */ H5FD_FLMAP_SINGLE, /*fl_map */ }; /* * Global var to allow elimination of redundant metadata writes to be * controlled by the value of an environment variable. * * Use the elimination by default unless this is the Intel Red machine */ #ifndef __PUMAGON__ hbool_t H5_fphdf5_1_metawrite_g = TRUE; #else hbool_t H5_fphdf5_1_metawrite_g = FALSE; #endif /* Interface initialization */ #define PABLO_MASK H5FD_fphdf5_mask #define INTERFACE_INIT H5FD_fphdf5_init static int interface_initialize_g = 0; /* ======== Temporary, Local data transfer properties ======== */ /* * Definitions for memory MPI type property */ #define H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME "H5FD_fphdf5_mem_mpi_type" #define H5FD_FPHDF5_XFER_MEM_MPI_TYPE_SIZE sizeof(MPI_Datatype) /* * Definitions for file MPI type property */ #define H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME "H5FD_fphdf5_file_mpi_type" #define H5FD_FPHDF5_XFER_FILE_MPI_TYPE_SIZE sizeof(MPI_Datatype) /* * Definitions for whether to use MPI types property */ #define H5FD_FPHDF5_XFER_USE_VIEW_NAME "H5FD_fphdf5_use_view" #define H5FD_FPHDF5_XFER_USE_VIEW_SIZE sizeof(unsigned) /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_init * Purpose: Initialize this driver by registering the driver with the * library. * Return: Success: The driver ID for the FPHDF5 driver. * Failure: FAIL * Programmer: Bill Wendling * 30. January 2003 * Modifications: *------------------------------------------------------------------------- */ hid_t H5FD_fphdf5_init(void) { hid_t ret_value; FUNC_ENTER_NOAPI(H5FD_fphdf5_init, FAIL); if (H5Iget_type(H5FD_FPHDF5_g) != H5I_VFL) H5FD_FPHDF5_g = H5FDregister(&H5FD_fphdf5_g); /* Set return value */ ret_value = H5FD_FPHDF5_g; done: FUNC_LEAVE_NOAPI(ret_value); } /*------------------------------------------------------------------------- * Function: H5Pset_fapl_fphdf5 * Purpose: Store the user supplied MPIO communicator COMM and INFO * in the file access property list FAPL_ID which can then * be used to create and/or open the file. This function is * available only in the parallel HDF5 library and is not * collective. * * COMM is the MPI communicator to be used for file open as * defined in MPI_FILE_OPEN of MPI-2. This function does not * make a duplicated communicator. Any modification to COMM * after this function call returns may have an indeterminate * effect on the access property list. Users should not * modify the communicator while it is defined in a property * list. * * INFO is the MPI info object to be used for file open as * defined in MPI_FILE_OPEN of MPI-2. This function does not * make a duplicated info. Any modification to info after * this function call returns may have an indeterminate effect * on the access property list. Users should not modify the * info while it is defined in a property list. * Return: Success: SUCCEED * Failure: FAIL * Programmer: Bill Wendling * 30. January 2003 * Modifications: *------------------------------------------------------------------------- */ herr_t H5Pset_fapl_fphdf5(hid_t fapl_id, MPI_Comm comm, MPI_Info info) { H5FD_fphdf5_fapl_t fa; H5P_genplist_t *plist; herr_t ret_value; FUNC_ENTER_API(H5Pset_fapl_fphdf5, FAIL); H5TRACE3("e","iMcMi",fapl_id,comm,info); if (fapl_id == H5P_DEFAULT) HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "can't set values in default property list"); /* Check arguments */ if ((plist = H5P_object_verify(fapl_id,H5P_FILE_ACCESS)) == NULL) HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a file access list"); /* Initialize driver specific properties */ fa.comm = comm; fa.info = info; ret_value = H5P_set_driver(plist, H5FD_FPHDF5, &fa); done: FUNC_LEAVE_API(ret_value); } /*------------------------------------------------------------------------- * Function: H5Pget_fapl_fphdf5 * Purpose: If the file access property list is set to the * H5FD_FPHDF5 driver then this function returns the MPI * communicator and information through the COMM and INFO * pointers. * Return: Success: SUCCEED with the communicator and information * returned through the COMM and INFO arguments * if non-null. Neither piece of information is * copied and they are therefore valid only * until the file access property list is * modified or closed. * Failure: FAIL * Programmer: Bill Wendling * 30. January 2003 * Modifications: *------------------------------------------------------------------------- */ herr_t H5Pget_fapl_fphdf5(hid_t fapl_id, MPI_Comm *comm /*out*/, MPI_Info *info /*out*/) { H5FD_fphdf5_fapl_t *fa; H5P_genplist_t *plist; herr_t ret_value = SUCCEED; FUNC_ENTER_API(H5Pget_fapl_fphdf5, FAIL); H5TRACE3("e","ixx",fapl_id,comm,info); if ((plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS)) == NULL) HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a file access list"); if (H5P_get_driver(plist) != H5FD_FPHDF5) HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "incorrect VFL driver"); if ((fa = H5P_get_driver_info(plist)) == NULL) HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "bad VFL driver info"); if (comm) *comm = fa->comm; if (info) *info = fa->info; done: FUNC_LEAVE_API(ret_value); } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_communicator * Purpose: Returns the MPI communicator for the file. * Return: Success: The communicator * Failure: NULL * Programmer: Bill Wendling * 30. January 2003 * Modifications: *------------------------------------------------------------------------- */ MPI_Comm H5FD_fphdf5_communicator(H5FD_t *_file) { H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; MPI_Comm ret_value; FUNC_ENTER_NOAPI(H5FD_fphdf5_communicator, MPI_COMM_NULL); /* check args */ assert(file); assert(file->pub.driver_id == H5FD_FPHDF5); /* Set return value */ ret_value = file->comm; done: FUNC_LEAVE_NOAPI(ret_value); } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_mpi_rank * Purpose: Returns the MPI rank for a process * Return: Success: MPI rank * Failure: FAIL * Programmer: Bill Wendling * 30. January 2003 * Modifications: *------------------------------------------------------------------------- */ int H5FD_fphdf5_mpi_rank(H5FD_t *_file) { H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; int ret_value; FUNC_ENTER_NOAPI(H5FD_fphdf5_mpi_rank, FAIL); /* check args */ assert(file); assert(file->pub.driver_id == H5FD_FPHDF5); /* Set return value */ ret_value = file->mpi_rank; done: FUNC_LEAVE_NOAPI(ret_value); } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_mpi_size * Purpose: Returns the number of MPI processes * Return: Success: Number of MPI processes * Failure: FAIL * Programmer: Bill Wendling * 30. January 2003 * Modifications: *------------------------------------------------------------------------- */ int H5FD_fphdf5_mpi_size(H5FD_t *_file) { H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; int ret_value; FUNC_ENTER_NOAPI(H5FD_fphdf5_mpi_size, FAIL); /* check args */ assert(file); assert(file->pub.driver_id == H5FD_FPHDF5); /* Set return value */ ret_value = file->mpi_size; done: FUNC_LEAVE_NOAPI(ret_value); } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_setup * Purpose: Set the buffer type BTYPE, file type FTYPE for a data * transfer. Also request an MPI type transfer. * Return: Success: SUCCEED * Failure: FAIL * Programmer: Bill Wendling * 30. January 2003 * Modifications: *------------------------------------------------------------------------- */ herr_t H5FD_fphdf5_setup(hid_t dxpl_id, MPI_Datatype btype, MPI_Datatype ftype, unsigned use_view) { H5P_genplist_t *plist; herr_t ret_value = SUCCEED; FUNC_ENTER_NOAPI(H5FD_fphdf5_setup, FAIL); /* Check arguments */ if ((plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)) == NULL) HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dataset transfer list"); /* Set buffer MPI type */ if (H5P_insert(plist, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_SIZE, &btype, NULL, NULL, NULL, NULL, NULL) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't insert MPI-I/O property"); /* Set file MPI type */ if (H5P_insert(plist, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_SIZE, &ftype, NULL, NULL, NULL, NULL, NULL) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't insert MPI-I/O property"); /* Set 'use view' property */ if (H5P_insert(plist, H5FD_FPHDF5_XFER_USE_VIEW_NAME, H5FD_FPHDF5_XFER_USE_VIEW_SIZE, &use_view, NULL, NULL, NULL, NULL, NULL) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't insert MPI-I/O property"); done: FUNC_LEAVE_NOAPI(ret_value); } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_teardown * Purpose: Remove the temporary MPI-I/O properties from dxpl. * Return: Success: SUCCEED * Failure: FAIL * Programmer: Bill Wendling * 30. January 2003 * Modifications: *------------------------------------------------------------------------- */ herr_t H5FD_fphdf5_teardown(hid_t dxpl_id) { H5P_genplist_t *plist; herr_t ret_value = SUCCEED; FUNC_ENTER_NOAPI(H5FD_fphdf5_teardown, FAIL); /* Check arguments */ if ((plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)) == NULL) HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dataset transfer list"); /* Remove buffer MPI type */ if (H5P_remove(dxpl_id, plist, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTDELETE, FAIL, "can't remove MPI-I/O property"); /* Remove file MPI type */ if (H5P_remove(dxpl_id, plist, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTDELETE, FAIL, "can't remove MPI-I/O property"); /* Remove 'use view' property */ if (H5P_remove(dxpl_id, plist, H5FD_FPHDF5_XFER_USE_VIEW_NAME) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTDELETE, FAIL, "can't remove MPI-I/O property"); done: FUNC_LEAVE_NOAPI(ret_value); } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_fapl_get * * Purpose: Returns a file access property list which could be used to * create another file the same as this one. * * Return: Success: Ptr to new file access property list with all * fields copied from the file pointer. * * Failure: NULL * * Programmer: Robb Matzke * Friday, August 13, 1999 * * Modifications: * *------------------------------------------------------------------------- */ static void * H5FD_fphdf5_fapl_get(H5FD_t *_file) { H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; H5FD_fphdf5_fapl_t *fa = NULL; void *ret_value; FUNC_ENTER_NOAPI(H5FD_fphdf5_fapl_get, NULL); /* check args */ assert(file); assert(file->pub.driver_id == H5FD_FPHDF5); if ((fa = H5MM_calloc(sizeof(H5FD_fphdf5_fapl_t))) == NULL) HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed"); /* These should both be copied. --rpm, 1999-08-13 */ fa->comm = file->comm; fa->info = file->info; /* Set return value */ ret_value = fa; done: FUNC_LEAVE_NOAPI(ret_value); } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_open * Purpose: Opens a file with name NAME. The FLAGS are a bit field with * purpose similar to the second argument of open(2) and * which are defined in H5Fpublic.h. The file access * property list FAPL_ID contains the properties driver * properties and MAXADDR is the largest address which this * file will be expected to access. This is collective. * Return: Success: A new file pointer. * Failure: NULL * Programmer: Bill Wendling * 05. February 2003 * Modifications: *------------------------------------------------------------------------- */ static H5FD_t * H5FD_fphdf5_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr) { H5FP_fphdf5_t *file = NULL; MPI_File fh; int mpi_amode; int mrc; MPI_Offset size; const H5FD_fphdf5_fapl_t *fa = NULL; H5FD_fphdf5_fapl_t _fa; H5P_genplist_t *plist; H5FD_t *ret_value = NULL; unsigned file_id; unsigned req_id; MPI_Status status; /* Flag to indicate that the file was successfully opened */ unsigned file_opened = FALSE; FUNC_ENTER_NOAPI(H5FD_fphdf5_open, NULL); /* check args */ assert(name); /* Obtain a pointer to mpio-specific file access properties */ if ((plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS)) == NULL) HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "not a file access property list"); if (fapl_id == H5P_FILE_ACCESS_DEFAULT || H5P_get_driver(plist) != H5FD_FPHDF5) { _fa.comm = MPI_COMM_SELF; /*default*/ _fa.info = MPI_INFO_NULL; /*default*/ fa = &_fa; } else { fa = H5P_get_driver_info(plist); assert(fa); } /* * Convert HDF5 flags to MPI-IO flags. Some combinations are illegal; * let MPI-IO figure it out */ mpi_amode = (flags & H5F_ACC_RDWR) ? MPI_MODE_RDWR : MPI_MODE_RDONLY; if (flags & H5F_ACC_CREAT) mpi_amode |= MPI_MODE_CREATE; if (flags & H5F_ACC_EXCL) mpi_amode |= MPI_MODE_EXCL; /* OKAY: CAST DISCARDS CONST */ if ((mrc = MPI_File_open(H5FP_SAP_COMM, (char *)name, mpi_amode, fa->info, &fh)) != MPI_SUCCESS) HMPI_GOTO_ERROR(NULL, "MPI_File_open failed", mrc); file_opened = TRUE; if (H5FP_request_open(name, (int)strlen(name), H5FP_OBJ_FILE, maxaddr, &file_id, &req_id) == FAIL) HGOTO_ERROR(H5E_IO, H5E_CANTOPENFILE, NULL, "can't inform SAP of file open"); HDmemset(&status, 0, sizeof(status)); /* Get the file ID from the SAP */ if (H5FP_my_rank == H5FP_capt_rank) if ((mrc = MPI_Recv(&file_id, 1, MPI_UNSIGNED, (int)H5FP_sap_rank, H5FP_TAG_FILE_ID, H5FP_SAP_COMM, &status)) != MPI_SUCCESS) HMPI_GOTO_ERROR(NULL, "MPI_Recv failed", mrc); /* Broadcast the file ID */ if ((mrc = MPI_Bcast(&file_id, 1, MPI_UNSIGNED, 0, H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS) HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mrc); /* The captain rank will get the filesize and broadcast it. */ if (H5FP_my_rank == H5FP_capt_rank) /* Get current file size */ if ((mrc = MPI_File_get_size(fh, &size)) != MPI_SUCCESS) HMPI_GOTO_ERROR(NULL, "MPI_File_get_size failed", mrc); /* Broadcast file-size */ if ((mrc = MPI_Bcast(&size, sizeof(MPI_Offset), MPI_BYTE, 0, H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS) HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mrc); /* Only if size > 0, truncate the file - if requested */ if (size && (flags & H5F_ACC_TRUNC)) { if ((mrc = MPI_File_set_size(fh, (MPI_Offset)0)) != MPI_SUCCESS) HMPI_GOTO_ERROR(NULL, "MPI_File_set_size (file truncation) failed", mrc); /* Don't let any proc return until all have truncated the file. */ if ((mrc = MPI_Barrier(H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS) HMPI_GOTO_ERROR(NULL, "MPI_Barrier failed", mrc); size = 0; } /* Build the return value and initialize it */ if ((file = H5MM_calloc(sizeof(H5FP_fphdf5_t))) == NULL) HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed"); file->f = fh; file->comm = fa->comm; file->info = fa->info; file->mpi_rank = H5FP_my_rank; file->mpi_size = H5FP_comm_size; file->eof = H5FD_fphdf5_MPIOff_to_haddr(size); /* Set return value */ ret_value = (H5FD_t *)file; ret_value->fphdf5_id = file_id; /* the file descriptor used in FPHDF5 */ done: if (!ret_value && file_opened) MPI_File_close(&fh); FUNC_LEAVE_NOAPI(ret_value); } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_close * * Purpose: Closes a file. This is collective. * * Return: Success: Non-negative * * Failure: Negative * * Programmer: Unknown * January 30, 1998 * * Modifications: * Robb Matzke, 1998-02-18 * Added the ACCESS_PARMS argument. * * Robb Matzke, 1999-08-06 * Modified to work with the virtual file layer. *------------------------------------------------------------------------- */ static herr_t H5FD_fphdf5_close(H5FD_t *_file) { H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; int mpi_code; /* mpi return code */ herr_t ret_value=SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(H5FD_fphdf5_close, FAIL); assert(file); assert(H5FD_FPHDF5==file->pub.driver_id); /* MPI_File_close sets argument to MPI_FILE_NULL */ if (MPI_SUCCESS != (mpi_code=MPI_File_close(&(file->f)/*in,out*/))) HMPI_GOTO_ERROR(FAIL, "MPI_File_close failed", mpi_code); /* Clean up other stuff */ H5MM_xfree(file); done: FUNC_LEAVE_NOAPI(ret_value); } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_query * * Purpose: Set the flags that this VFL driver is capable of supporting. * (listed in H5FDpublic.h) * * Return: Success: non-negative * * Failure: negative * * Programmer: Quincey Koziol * Friday, August 25, 2000 * * Modifications: * *------------------------------------------------------------------------- */ static herr_t H5FD_fphdf5_query(const H5FD_t UNUSED *_file, unsigned long *flags /* out */) { herr_t ret_value=SUCCEED; FUNC_ENTER_NOAPI(H5FD_fphdf5_query, FAIL); /* Set the VFL feature flags that this driver supports */ if(flags) { *flags=0; *flags|=H5FD_FEAT_AGGREGATE_METADATA; /* OK to aggregate metadata allocations */ /* Distinguish between updating the metadata accumulator on writes and * reads. This is particularly (perhaps only, even) important for MPI-I/O * where we guarantee that writes are collective, but reads may not be. * If we were to allow the metadata accumulator to be written during a * read operation, the application would hang. */ *flags|=H5FD_FEAT_ACCUMULATE_METADATA_WRITE; /* OK to accumulate metadata for faster writes */ *flags|=H5FD_FEAT_AGGREGATE_SMALLDATA; /* OK to aggregate "small" raw data allocations */ } /* end if */ done: FUNC_LEAVE_NOAPI(ret_value); } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_get_eoa * * Purpose: Gets the end-of-address marker for the file. The EOA marker * is the first address past the last byte allocated in the * format address space. * * Return: Success: The end-of-address marker. * * Failure: HADDR_UNDEF * * Programmer: Robb Matzke * Friday, August 6, 1999 * * Modifications: * *------------------------------------------------------------------------- */ static haddr_t H5FD_fphdf5_get_eoa(H5FD_t *_file) { H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; haddr_t ret_value; /* Return value */ FUNC_ENTER_NOAPI(H5FD_fphdf5_get_eoa, HADDR_UNDEF); assert(file); assert(H5FD_FPHDF5==file->pub.driver_id); /* Set return value */ ret_value=file->eoa; done: FUNC_LEAVE_NOAPI(ret_value); } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_set_eoa * * Purpose: Set the end-of-address marker for the file. This function is * called shortly after an existing HDF5 file is opened in order * to tell the driver where the end of the HDF5 data is located. * * Return: Success: 0 * * Failure: -1 * * Programmer: Robb Matzke * Friday, August 6, 1999 * * Modifications: * *------------------------------------------------------------------------- */ static herr_t H5FD_fphdf5_set_eoa(H5FD_t *_file, haddr_t addr) { H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; herr_t ret_value=SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(H5FD_fphdf5_set_eoa, FAIL); assert(file); assert(H5FD_FPHDF5==file->pub.driver_id); file->eoa = addr; done: FUNC_LEAVE_NOAPI(ret_value); } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_get_eof * * Purpose: Gets the end-of-file marker for the file. The EOF marker * is the real size of the file. * * The MPIO driver doesn't bother keeping this field updated * since that's a relatively expensive operation. Fortunately * the library only needs the EOF just after the file is opened * in order to determine whether the file is empty, truncated, * or okay. Therefore, any MPIO I/O function will set its value * to HADDR_UNDEF which is the error return value of this * function. * * Return: Success: The end-of-address marker. * * Failure: HADDR_UNDEF * * Programmer: Robb Matzke * Friday, August 6, 1999 * * Modifications: * *------------------------------------------------------------------------- */ static haddr_t H5FD_fphdf5_get_eof(H5FD_t *_file) { H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; haddr_t ret_value; /* Return value */ FUNC_ENTER_NOAPI(H5FD_fphdf5_get_eof, HADDR_UNDEF); assert(file); assert(H5FD_FPHDF5==file->pub.driver_id); /* Set return value */ ret_value=file->eof; done: FUNC_LEAVE_NOAPI(ret_value); } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_get_handle * * Purpose: Returns the file handle of MPIO file driver. * * Returns: Non-negative if succeed or negative if fails. * * Programmer: Raymond Lu * Sept. 16, 2002 * * Modifications: * *------------------------------------------------------------------------- */ static herr_t H5FD_fphdf5_get_handle(H5FD_t *_file, hid_t UNUSED fapl, void** file_handle) { H5FP_fphdf5_t *file = (H5FP_fphdf5_t *)_file; herr_t ret_value = SUCCEED; FUNC_ENTER_NOAPI(H5FD_fphdf5_get_handle, FAIL); if(!file_handle) HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file handle not valid"); *file_handle = &(file->f); done: FUNC_LEAVE_NOAPI(ret_value); } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_read * * Purpose: Reads SIZE bytes of data from FILE beginning at address ADDR * into buffer BUF according to data transfer properties in * DXPL_ID using potentially complex file and buffer types to * effect the transfer. * * Reading past the end of the MPI file returns zeros instead of * failing. MPI is able to coalesce requests from different * processes (collective or independent). * * Return: Success: Zero. Result is stored in caller-supplied * buffer BUF. * * Failure: -1, Contents of buffer BUF are undefined. * * Programmer: rky, 1998-01-30 * * Modifications: * Robb Matzke, 1998-02-18 * Added the ACCESS_PARMS argument. * * rky, 1998-04-10 * Call independent or collective MPI read, based on * ACCESS_PARMS. * * Albert Cheng, 1998-06-01 * Added XFER_MODE to control independent or collective MPI * read. * * rky, 1998-08-16 * Use BTYPE, FTYPE, and DISP from access parms. The guts of * H5FD_fphdf5_read and H5FD_fphdf5_write should be replaced by a * single dual-purpose routine. * * Robb Matzke, 1999-04-21 * Changed XFER_MODE to XFER_PARMS for all H5F_*_read() * callbacks. * * Robb Matzke, 1999-07-28 * The ADDR argument is passed by value. * * Robb Matzke, 1999-08-06 * Modified to work with the virtual file layer. * * Quincey Koziol, 2002-05-14 * Only call MPI_Get_count if we can use MPI_BYTE for the MPI type * for the I/O transfer. Someday we might include code to decode * the MPI type used for more complicated transfers and call * MPI_Get_count all the time. * * Quincey Koziol - 2002/06/17 * Removed 'disp' parameter from H5FD_fphdf5_setup routine and use * the address of the dataset in MPI_File_set_view() calls, as * necessary. * * Quincey Koziol - 2002/06/24 * Removed "lazy" MPI_File_set_view() calls, since they would fail * if the first I/O was a collective I/O using MPI derived types * and the next I/O was an independent I/O. * *------------------------------------------------------------------------- */ static herr_t H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t addr, size_t size, void *buf/*out*/) { #if 0 H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; const H5FD_fphdf5_dxpl_t *dx=NULL; H5FD_fphdf5_dxpl_t _dx; MPI_Offset mpi_off, mpi_disp; MPI_Status mpi_stat; int mpi_code; /* mpi return code */ MPI_Datatype buf_type, file_type; int size_i, bytes_read, n; unsigned use_view_this_time=0; H5P_genplist_t *plist; /* Property list pointer */ herr_t ret_value=SUCCEED; FUNC_ENTER_NOAPI(H5FD_fphdf5_read, FAIL); assert(file); assert(H5FD_FPHDF5==file->pub.driver_id); /* Make certain we have the correct type of property list */ assert(H5I_GENPROP_LST==H5I_get_type(dxpl_id)); assert(TRUE==H5P_isa_class(dxpl_id,H5P_DATASET_XFER)); assert(buf); /* Portably initialize MPI status variable */ HDmemset(&mpi_stat,0,sizeof(MPI_Status)); /* some numeric conversions */ if (H5FD_fphdf5_haddr_to_MPIOff(addr, &mpi_off/*out*/)<0) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off"); size_i = (int)size; if ((hsize_t)size_i != size) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i"); /* Obtain the data transfer properties */ if(NULL == (plist = H5I_object(dxpl_id))) HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list"); if (H5FD_FPHDF5!=H5P_get_driver(plist)) { _dx.xfer_mode = H5FD_FPHDF5_INDEPENDENT; /*the default*/ dx = &_dx; } else { dx = H5P_get_driver_info(plist); assert(dx); } /* * Set up for a fancy xfer using complex types, or single byte block. We * wouldn't need to rely on the use_view field if MPI semantics allowed * us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which * could mean "use MPI_BYTE" by convention). */ if(H5P_exist_plist(plist,H5FD_FPHDF5_XFER_USE_VIEW_NAME)>0) if(H5P_get(plist,H5FD_FPHDF5_XFER_USE_VIEW_NAME,&use_view_this_time)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property"); if (use_view_this_time) { /* prepare for a full-blown xfer using btype, ftype, and disp */ if(H5P_get(plist,H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property"); if(H5P_get(plist,H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME,&file_type)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property"); /* When using types, use the address as the displacement for * MPI_File_set_view and reset the address for the read to zero */ mpi_disp=mpi_off; mpi_off=0; } /* end if */ else { /* * Prepare for a simple xfer of a contiguous block of bytes. The * btype, ftype, and disp fields are not used. */ buf_type = MPI_BYTE; file_type = MPI_BYTE; mpi_disp = 0; /* mpi_off is alread set */ } /* end else */ /* * Set the file view when we are using MPI derived types */ if (use_view_this_time) { /*OKAY: CAST DISCARDS CONST QUALIFIER*/ if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info))) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code); } /* end if */ /* Read the data. */ assert(H5FD_FPHDF5_INDEPENDENT==dx->xfer_mode || H5FD_FPHDF5_COLLECTIVE==dx->xfer_mode); if (H5FD_FPHDF5_INDEPENDENT==dx->xfer_mode) { if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat))) HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code); } else { if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat ))) HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code); } /* KLUDGE, Robb Matzke, 2000-12-29 * The LAM implementation of MPI_Get_count() says * MPI_Get_count: invalid argument (rank 0, MPI_COMM_WORLD) * So I'm commenting this out until it can be investigated. The * returned `bytes_written' isn't used anyway because of Kim's * kludge to avoid bytes_written<0. Likewise in H5FD_fphdf5_write(). */ #ifdef H5_HAVE_MPI_GET_COUNT /* Bill and Albert's kludge*/ /* Yet Another KLUDGE, Albert Cheng & Bill Wendling, 2001-05-11. * Many systems don't support MPI_Get_count so we need to do a * configure thingy to fix this. */ /* Calling MPI_Get_count with "MPI_BYTE" is only valid when we actually * had the 'buf_type' set to MPI_BYTE -QAK */ if(use_view_this_time) { /* Figure out the mapping from the MPI 'buf_type' to bytes, someday... * If this gets fixed (and MPI_Get_count() is reliable), the * kludge below where the 'bytes_read' value from MPI_Get_count() is * overwritten with the 'size_i' parameter can be removed. -QAK */ } /* end if */ else { /* How many bytes were actually read? */ if (MPI_SUCCESS != (mpi_code=MPI_Get_count(&mpi_stat, MPI_BYTE, &bytes_read))) HMPI_GOTO_ERROR(FAIL, "MPI_Get_count failed", mpi_code); } /* end else */ #endif /* H5_HAVE_MPI_GET_COUNT */ /* * KLUGE rky 1998-02-02 * MPI_Get_count incorrectly returns negative count; fake a complete * read. */ bytes_read = size_i; /* Check for read failure */ if (bytes_read<0 || bytes_read>size_i) HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed"); /* * Reset the file view when we used MPI derived types */ if (use_view_this_time) { /*OKAY: CAST DISCARDS CONST QUALIFIER*/ if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info))) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code); } /* end if */ /* * This gives us zeroes beyond end of physical MPI file. What about * reading past logical end of HDF5 file??? */ if ((n=(size_i-bytes_read)) > 0) { if (use_view_this_time) { /* * INCOMPLETE rky 1998-09-18 * Haven't implemented reading zeros beyond EOF. What to do??? */ HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "eof file read failed"); } else { memset((char*)buf+bytes_read, 0, (size_t)n); } } done: FUNC_LEAVE_NOAPI(ret_value); #else return SUCCEED; #endif } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_write * * Purpose: Writes SIZE bytes of data to FILE beginning at address ADDR * from buffer BUF according to data transfer properties in * DXPL_ID using potentially complex file and buffer types to * effect the transfer. * * MPI is able to coalesce requests from different processes * (collective and independent). * * Return: Success: Zero. USE_TYPES and OLD_USE_TYPES in the * access params are altered. * * Failure: -1, USE_TYPES and OLD_USE_TYPES in the * access params may be altered. * * Programmer: Unknown * January 30, 1998 * * Modifications: * rky, 1998-08-28 * If the file->allsame flag is set, we assume that all the * procs in the relevant MPI communicator will write identical * data at identical offsets in the file, so only proc 0 will * write, and all other procs will wait for p0 to finish. This * is useful for writing metadata, for example. Note that we * don't _check_ that the data is identical. Also, the mechanism * we use to eliminate the redundant writes is by requiring a * call to H5FD_fphdf5_tas_allsame before the write, which is * rather klugey. Would it be better to pass a parameter to * low-level writes like H5F_block_write and H5F_low_write, * instead? Or...??? Also, when I created this mechanism I * wanted to minimize the difference in behavior between the old * way of doing things (i.e., all procs write) and the new way, * so the writes are eliminated at the very lowest level, here * in H5FD_fphdf5_write. It may be better to rethink that, and * short-circuit the writes at a higher level (e.g., at the * points in the code where H5FD_fphdf5_tas_allsame is called). * * * Robb Matzke, 1998-02-18 * Added the ACCESS_PARMS argument. * * rky, 1998-04-10 * Call independent or collective MPI write, based on * ACCESS_PARMS. * * rky, 1998-04-24 * Removed redundant write from H5FD_fphdf5_write. * * Albert Cheng, 1998-06-01 * Added XFER_MODE to control independent or collective MPI * write. * * rky, 1998-08-16 * Use BTYPE, FTYPE, and DISP from access parms. The guts of * H5FD_fphdf5_read and H5FD_fphdf5_write should be replaced by a * single dual-purpose routine. * * rky, 1998-08-28 * Added ALLSAME parameter to make all but proc 0 skip the * actual write. * * Robb Matzke, 1999-04-21 * Changed XFER_MODE to XFER_PARMS for all H5FD_*_write() * callbacks. * * Robb Matzke, 1999-07-28 * The ADDR argument is passed by value. * * Robb Matzke, 1999-08-06 * Modified to work with the virtual file layer. * * Albert Cheng, 1999-12-19 * When only-p0-write-allsame-data, p0 Bcasts the * ret_value to other processes. This prevents * a racing condition (that other processes try to * read the file before p0 finishes writing) and also * allows all processes to report the same ret_value. * * Kim Yates, Pat Weidhaas, 2000-09-26 * Move block of coding where only p0 writes after the * MPI_File_set_view call. * * Quincey Koziol, 2002-05-10 * Instead of always writing metadata from process 0, spread the * burden among all the processes by using a round-robin rotation * scheme. * * Quincey Koziol, 2002-05-10 * Removed allsame code, keying off the type parameter instead. * * Quincey Koziol, 2002-05-14 * Only call MPI_Get_count if we can use MPI_BYTE for the MPI type * for the I/O transfer. Someday we might include code to decode * the MPI type used for more complicated transfers and call * MPI_Get_count all the time. * * Quincey Koziol - 2002/06/17 * Removed 'disp' parameter from H5FD_fphdf5_setup routine and use * the address of the dataset in MPI_File_set_view() calls, as * necessary. * * Quincey Koziol - 2002/06/24 * Removed "lazy" MPI_File_set_view() calls, since they would fail * if the first I/O was a collective I/O using MPI derived types * and the next I/O was an independent I/O. * * Quincey Koziol - 2002/07/18 * Added "block_before_meta_write" dataset transfer flag, which * is set during writes from a metadata cache flush and indicates * that all the processes must sync up before (one of them) * writing metadata. * *------------------------------------------------------------------------- */ static herr_t H5FD_fphdf5_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, size_t size, const void *buf) { #if 0 H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; const H5FD_fphdf5_dxpl_t *dx=NULL; H5FD_fphdf5_dxpl_t _dx; MPI_Offset mpi_off, mpi_disp; MPI_Status mpi_stat; MPI_Datatype buf_type, file_type; int mpi_code; /* MPI return code */ int size_i, bytes_written; unsigned use_view_this_time=0; unsigned block_before_meta_write=0; /* Whether to block before a metadata write */ H5P_genplist_t *plist; /* Property list pointer */ herr_t ret_value=SUCCEED; FUNC_ENTER_NOAPI(H5FD_fphdf5_write, FAIL); assert(file); assert(H5FD_FPHDF5==file->pub.driver_id); /* Make certain we have the correct type of property list */ assert(H5I_GENPROP_LST==H5I_get_type(dxpl_id)); assert(TRUE==H5P_isa_class(dxpl_id,H5P_DATASET_XFER)); assert(buf); /* Portably initialize MPI status variable */ HDmemset(&mpi_stat,0,sizeof(MPI_Status)); /* some numeric conversions */ if (H5FD_fphdf5_haddr_to_MPIOff(addr, &mpi_off)<0) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off"); size_i = (int)size; if ((hsize_t)size_i != size) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i"); /* Obtain the data transfer properties */ if(NULL == (plist = H5I_object(dxpl_id))) HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list"); if (H5FD_FPHDF5!=H5P_get_driver(plist)) { _dx.xfer_mode = H5FD_FPHDF5_INDEPENDENT; /*the default*/ dx = &_dx; } else { dx = H5P_get_driver_info(plist); assert(dx); } /* * Set up for a fancy xfer using complex types, or single byte block. We * wouldn't need to rely on the use_view field if MPI semantics allowed * us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which * could mean "use MPI_BYTE" by convention). */ if(H5P_exist_plist(plist,H5FD_FPHDF5_XFER_USE_VIEW_NAME)>0) if(H5P_get(plist,H5FD_FPHDF5_XFER_USE_VIEW_NAME,&use_view_this_time)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property"); if (use_view_this_time) { /* prepare for a full-blown xfer using btype, ftype, and disp */ if(H5P_get(plist,H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property"); if(H5P_get(plist,H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME,&file_type)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property"); /* When using types, use the address as the displacement for * MPI_File_set_view and reset the address for the read to zero */ mpi_disp=mpi_off; mpi_off=0; } /* end if */ else { /* * Prepare for a simple xfer of a contiguous block of bytes. * The btype, ftype, and disp fields are not used. */ buf_type = MPI_BYTE; file_type = MPI_BYTE; mpi_disp = 0; /* mpi_off is already set */ } /* end else */ /* * Set the file view when we are using MPI derived types */ if (use_view_this_time) { /*OKAY: CAST DISCARDS CONST QUALIFIER*/ if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info))) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code); } /* end if */ /* Metadata specific actions */ if(type!=H5FD_MEM_DRAW) { /* Check if we need to syncronize all processes before attempting metadata write * (Prevents race condition where the process writing the metadata goes ahead * and writes the metadata to the file before all the processes have * read the data, "transmitting" data from the "future" to the reading * process. -QAK ) */ if(H5P_exist_plist(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME)>0) if(H5P_get(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,&block_before_meta_write)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get H5AC property"); if(block_before_meta_write) if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code); /* Only p will do the actual write if all procs in comm write same metadata */ if (H5_fphdf5_1_metawrite_g) { if (file->mpi_rank != file->mpi_round) { HGOTO_DONE(SUCCEED) /* skip the actual write */ } } } /* end if */ /* Write the data. */ assert(H5FD_MPIO_INDEPENDENT==dx->xfer_mode || H5FD_MPIO_COLLECTIVE==dx->xfer_mode); if (H5FD_MPIO_INDEPENDENT==dx->xfer_mode) { /*OKAY: CAST DISCARDS CONST QUALIFIER*/ if (MPI_SUCCESS != (mpi_code=MPI_File_write_at(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat))) HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code); } else { /*OKAY: CAST DISCARDS CONST QUALIFIER*/ if (MPI_SUCCESS != (mpi_code=MPI_File_write_at_all(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat))) HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code); } /* KLUDGE, Robb Matzke, 2000-12-29 * The LAM implementation of MPI_Get_count() says * MPI_Get_count: invalid argument (rank 0, MPI_COMM_WORLD) * So I'm commenting this out until it can be investigated. The * returned `bytes_written' isn't used anyway because of Kim's * kludge to avoid bytes_written<0. Likewise in H5FD_fphdf5_read(). */ #ifdef H5_HAVE_MPI_GET_COUNT /* Bill and Albert's kludge*/ /* Yet Another KLUDGE, Albert Cheng & Bill Wendling, 2001-05-11. * Many systems don't support MPI_Get_count so we need to do a * configure thingy to fix this. */ /* Calling MPI_Get_count with "MPI_BYTE" is only valid when we actually * had the 'buf_type' set to MPI_BYTE -QAK */ if(use_view_this_time) { /* Figure out the mapping from the MPI 'buf_type' to bytes, someday... * If this gets fixed (and MPI_Get_count() is reliable), the * kludge below where the 'bytes_written' value from MPI_Get_count() is * overwritten with the 'size_i' parameter can be removed. -QAK */ } /* end if */ else { /* How many bytes were actually written? */ if (MPI_SUCCESS!= (mpi_code=MPI_Get_count(&mpi_stat, MPI_BYTE, &bytes_written))) HMPI_GOTO_ERROR(FAIL, "MPI_Get_count failed", mpi_code); } /* end else */ #endif /* H5_HAVE_MPI_GET_COUNT */ /* * KLUGE rky, 1998-02-02 * MPI_Get_count incorrectly returns negative count; fake a complete * write. */ bytes_written = size_i; /* Check for write failure */ if (bytes_written<0 || bytes_written>size_i) HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file write failed"); /* * Reset the file view when we used MPI derived types */ if (use_view_this_time) { /*OKAY: CAST DISCARDS CONST QUALIFIER*/ if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info))) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code); } /* end if */ /* Forget the EOF value (see H5FD_fphdf5_get_eof()) --rpm 1999-08-06 */ file->eof = HADDR_UNDEF; done: /* Guard against getting into metadate broadcast in failure cases */ if(ret_value!=FAIL) { /* if only p writes, need to broadcast the ret_value to other processes */ if ((type!=H5FD_MEM_DRAW) && H5_fphdf5_1_metawrite_g) { if (MPI_SUCCESS != (mpi_code=MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, file->mpi_round, file->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code); /* Round-robin rotate to the next process */ file->mpi_round = (++file->mpi_round)%file->mpi_size; } /* end if */ } /* end if */ FUNC_LEAVE_NOAPI(ret_value); #else return SUCCEED; #endif } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_flush * * Purpose: Makes sure that all data is on disk. This is collective. * * Return: Success: Non-negative * * Failure: Negative * * Programmer: Unknown * January 30, 1998 * * Modifications: * Robb Matzke, 1998-02-18 * Added the ACCESS_PARMS argument. * * Robb Matzke, 1999-08-06 * Modified to work with the virtual file layer. * * Robb Matzke, 2000-12-29 * Make sure file size is at least as large as the last * allocated byte. * * Quincey Koziol, 2002-06-?? * Changed file extension method to use MPI_File_set_size instead * read->write method. * *------------------------------------------------------------------------- */ static herr_t H5FD_fphdf5_flush(H5FD_t *_file, unsigned closing) { #if 0 H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; int mpi_code; /* mpi return code */ MPI_Offset mpi_off; herr_t ret_value=SUCCEED; #ifdef OLD_WAY uint8_t byte=0; MPI_Status mpi_stat; #endif /* OLD_WAY */ FUNC_ENTER_NOAPI(H5FD_fphdf5_flush, FAIL); assert(file); assert(H5FD_FPHDF5==file->pub.driver_id); #ifdef OLD_WAY /* Portably initialize MPI status variable */ HDmemset(&mpi_stat,0,sizeof(MPI_Status)); #endif /* OLD_WAY */ /* Extend the file to make sure it's large enough, then sync. * Unfortunately, keeping track of EOF is an expensive operation, so * we can't just check whether EOFeoa>file->last_eoa) { #ifdef OLD_WAY if (0==file->mpi_rank) { if (H5FD_fphdf5_haddr_to_MPIOff(file->eoa-1, &mpi_off)<0) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "cannot convert from haddr_t to MPI_Offset"); if (MPI_SUCCESS != (mpi_code=MPI_File_read_at(file->f, mpi_off, &byte, 1, MPI_BYTE, &mpi_stat))) HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code); if (MPI_SUCCESS != (mpi_code=MPI_File_write_at(file->f, mpi_off, &byte, 1, MPI_BYTE, &mpi_stat))) HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code); } /* end if */ #else /* OLD_WAY */ if (H5FD_fphdf5_haddr_to_MPIOff(file->eoa, &mpi_off)<0) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "cannot convert from haddr_t to MPI_Offset"); /* Extend the file's size */ if (MPI_SUCCESS != (mpi_code=MPI_File_set_size(file->f, mpi_off))) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_size failed", mpi_code); /* Don't let any proc return until all have extended the file. * (Prevents race condition where some processes go ahead and write * more data to the file before all the processes have finished making * it the shorter length, potentially truncating the file and dropping * the new data written) */ if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code); #endif /* OLD_WAY */ /* Update the 'last' eoa value */ file->last_eoa=file->eoa; } /* end if */ /* Only sync the file if we are not going to immediately close it */ if(!closing) { if (MPI_SUCCESS != (mpi_code=MPI_File_sync(file->f))) HMPI_GOTO_ERROR(FAIL, "MPI_File_sync failed", mpi_code); } /* end if */ done: FUNC_LEAVE_NOAPI(ret_value); #else return SUCCEED; #endif } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_MPIOff_to_haddr * Purpose: Convert an MPI_Offset value to haddr_t. * Return: Success: The haddr_t equivalent of the MPI_OFF argument. * Failure: HADDR_UNDEF * Programmer: Bill Wendling * 30. January 2003 * Modifications: *------------------------------------------------------------------------- */ static haddr_t H5FD_fphdf5_MPIOff_to_haddr(MPI_Offset mpi_off) { haddr_t ret_value; FUNC_ENTER_NOINIT(H5FD_fphdf5_MPIOff_to_haddr); ret_value = (mpi_off != (MPI_Offset)(haddr_t)mpi_off ? HADDR_UNDEF : (haddr_t)mpi_off); FUNC_LEAVE_NOAPI(ret_value); } /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_haddr_to_MPIOff * Purpose: Convert an haddr_t value to MPI_Offset. * Return: Success: Non-negative, the MPI_OFF argument contains * the converted value. * Failure: FAIL, MPI_OFF is undefined. * Programmer: Bill Wendling * 30. January 2003 * Modifications: *------------------------------------------------------------------------- */ static herr_t H5FD_fphdf5_haddr_to_MPIOff(haddr_t addr, MPI_Offset *mpi_off /*out*/) { herr_t ret_value = FAIL; FUNC_ENTER_NOINIT(H5FD_fphdf5_haddr_to_MPIOff); if (mpi_off) *mpi_off = (MPI_Offset)addr; ret_value = (addr != (haddr_t)(MPI_Offset)addr ? FAIL : SUCCEED); FUNC_LEAVE_NOAPI(ret_value); } #endif /* H5_HAVE_FPHDF5 */