From c7cce26e6ac7dee24d04bd3f7fdad864b156016a Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 6 Feb 2003 17:08:05 -0500 Subject: [svn-r6379] Purpose: Update Description: H5FP.c, H5FPclient.c, H5FPprivate.h, H5FPserver.c: Update. More progression towards the SAP as metadata cache. It only lacks the ability to take care of metadata allocations. H5FDfphdf5.[ch]: Start of a new driver for FPHDF5. Not fully implemented just yet... Platforms tested: Linux --- src/H5FDfphdf5.c | 1589 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/H5FDfphdf5.h | 62 +++ src/H5FP.c | 2 +- src/H5FPclient.c | 87 +-- src/H5FPprivate.h | 17 +- src/H5FPserver.c | 90 ++- 6 files changed, 1747 insertions(+), 100 deletions(-) create mode 100644 src/H5FDfphdf5.c create mode 100644 src/H5FDfphdf5.h diff --git a/src/H5FDfphdf5.c b/src/H5FDfphdf5.c new file mode 100644 index 0000000..689101b --- /dev/null +++ b/src/H5FDfphdf5.c @@ -0,0 +1,1589 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the files COPYING and Copyright.html. COPYING can be found at the root * + * of the source code distribution tree; Copyright.html can be found at the * + * root level of an installed copy of the electronic HDF5 document set and * + * is linked from the top-level documents page. It can also be found at * + * http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html. If you do not have * + * access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include "H5private.h" /* Library functions */ +#include "H5ACprivate.h" /* Metadata cache */ +#include "H5Eprivate.h" /* Error handling */ +#include "H5Fprivate.h" /* Files */ +#include "H5FDprivate.h" /* File driver */ +#include "H5FDfphdf5.h" /* Flexible PHDF5 I/O file driver */ +#include "H5Iprivate.h" /* Object IDs */ +#include "H5MMprivate.h" /* Memory allocation */ +#include "H5Pprivate.h" /* Property lists */ + +#ifdef H5_HAVE_FPHDF5 + +#include "H5FPprivate.h" /* Flexible PHDF5 */ + +/* + * The driver identification number, initialized at runtime if + * H5_HAVE_FPHDF5 is defined. This allows applications to still have + * the H5FD_FPHDF5 "constants" in their source code (it also makes this + * file strictly ANSI compliant when H5_HAVE_FPHDF5 isn't defined) + */ +static hid_t H5FD_FPHDF5_g = 0; + +/* + * The description of a file belonging to this driver. + * + * The EOF value is only used just after the file is opened in order for + * the library to determine whether the file is empty, truncated, or + * okay. The MPIO driver doesn't bother to keep it updated since it's an + * expensive operation. + */ +typedef struct H5FP_fphdf5_t { + H5FD_t pub; /*Public stuff, must be first (ick!) */ + MPI_File f; /*MPIO file handle */ + MPI_Comm comm; /*Communicator */ + MPI_Info info; /*File information */ + int mpi_rank; /*This process's rank */ + int mpi_size; /*Total number of processes */ + int mpi_round; /*Current round robin process (for metadata I/O) */ + haddr_t eof; /*End-of-file marker */ + haddr_t eoa; /*End-of-address marker */ + haddr_t last_eoa; /*Last known end-of-address marker */ +} H5FP_fphdf5_t; + +/* + * Prototypes + */ +static haddr_t H5FD_fphdf5_MPIOff_to_haddr(MPI_Offset mpi_off); +static herr_t H5FD_fphdf5_haddr_to_MPIOff(haddr_t addr, MPI_Offset *mpi_off); + +/* + * Callbacks + */ +static void *H5FD_fphdf5_fapl_get(H5FD_t *_file); +static H5FD_t *H5FD_fphdf5_open(const char *name, unsigned flags, + hid_t fapl_id, haddr_t maxaddr); +static herr_t H5FD_fphdf5_close(H5FD_t *_file); +static herr_t H5FD_fphdf5_query(const H5FD_t *_f1, unsigned long *flags); +static haddr_t H5FD_fphdf5_get_eoa(H5FD_t *_file); +static herr_t H5FD_fphdf5_set_eoa(H5FD_t *_file, haddr_t addr); +static haddr_t H5FD_fphdf5_get_eof(H5FD_t *_file); +static herr_t H5FD_fphdf5_get_handle(H5FD_t *_file, hid_t fapl, + void **file_handle); +static herr_t H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, + haddr_t addr, size_t size, void *buf); +static herr_t H5FD_fphdf5_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, + haddr_t addr, size_t size, const void *buf); +static herr_t H5FD_fphdf5_flush(H5FD_t *_file, unsigned closing); + +/* + * FPHDF5-specific file access properties + */ +typedef struct H5FD_fphdf5_fapl_t { + MPI_Comm comm; /*communicator */ + MPI_Info info; /*file information */ +} H5FD_fphdf5_fapl_t; + +/* + * The FPHDF5 file driver information + */ +static const H5FD_class_t H5FD_fphdf5_g = { + "fphdf5", /*name */ + HADDR_MAX, /*maxaddr */ + H5F_CLOSE_SEMI, /*fc_degree */ + NULL, /*sb_size */ + NULL, /*sb_encode */ + NULL, /*sb_decode */ + sizeof(H5FD_fphdf5_fapl_t), /*fapl_size */ + H5FD_fphdf5_fapl_get, /*fapl_get */ + NULL, /*fapl_copy */ + NULL, /*fapl_free */ + 0, /*dxpl_size */ + NULL, /*dxpl_copy */ + NULL, /*dxpl_free */ + H5FD_fphdf5_open, /*open */ + H5FD_fphdf5_close, /*close */ + NULL, /*cmp */ + H5FD_fphdf5_query, /*query */ + NULL, /*alloc */ + NULL, /*free */ + H5FD_fphdf5_get_eoa, /*get_eoa */ + H5FD_fphdf5_set_eoa, /*set_eoa */ + H5FD_fphdf5_get_eof, /*get_eof */ + H5FD_fphdf5_get_handle, /*get_handle */ + H5FD_fphdf5_read, /*read */ + H5FD_fphdf5_write, /*write */ + H5FD_fphdf5_flush, /*flush */ + H5FD_FLMAP_SINGLE, /*fl_map */ +}; + +/* + * Global var to allow elimination of redundant metadata writes to be + * controlled by the value of an environment variable. + * + * Use the elimination by default unless this is the Intel Red machine + */ +#ifndef __PUMAGON__ +hbool_t H5_fphdf5_1_metawrite_g = TRUE; +#else +hbool_t H5_fphdf5_1_metawrite_g = FALSE; +#endif + +/* Interface initialization */ +#define PABLO_MASK H5FD_fphdf5_mask +#define INTERFACE_INIT H5FD_fphdf5_init + +static int interface_initialize_g = 0; + +/* ======== Temporary, Local data transfer properties ======== */ +/* + * Definitions for memory MPI type property + */ +#define H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME "H5FD_fphdf5_mem_mpi_type" +#define H5FD_FPHDF5_XFER_MEM_MPI_TYPE_SIZE sizeof(MPI_Datatype) + +/* + * Definitions for file MPI type property + */ +#define H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME "H5FD_fphdf5_file_mpi_type" +#define H5FD_FPHDF5_XFER_FILE_MPI_TYPE_SIZE sizeof(MPI_Datatype) + +/* + * Definitions for whether to use MPI types property + */ +#define H5FD_FPHDF5_XFER_USE_VIEW_NAME "H5FD_fphdf5_use_view" +#define H5FD_FPHDF5_XFER_USE_VIEW_SIZE sizeof(unsigned) + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_init + * Purpose: Initialize this driver by registering the driver with the + * library. + * Return: Success: The driver ID for the FPHDF5 driver. + * Failure: FAIL + * Programmer: Bill Wendling + * 30. January 2003 + * Modifications: + *------------------------------------------------------------------------- + */ +hid_t +H5FD_fphdf5_init(void) +{ + hid_t ret_value; + + FUNC_ENTER_NOAPI(H5FD_fphdf5_init, FAIL); + + if (H5Iget_type(H5FD_FPHDF5_g) != H5I_VFL) + H5FD_FPHDF5_g = H5FDregister(&H5FD_fphdf5_g); + + /* Set return value */ + ret_value = H5FD_FPHDF5_g; + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5Pset_fapl_fphdf5 + * Purpose: Store the user supplied MPIO communicator COMM and INFO + * in the file access property list FAPL_ID which can then + * be used to create and/or open the file. This function is + * available only in the parallel HDF5 library and is not + * collective. + * + * COMM is the MPI communicator to be used for file open as + * defined in MPI_FILE_OPEN of MPI-2. This function does not + * make a duplicated communicator. Any modification to COMM + * after this function call returns may have an indeterminate + * effect on the access property list. Users should not + * modify the communicator while it is defined in a property + * list. + * + * INFO is the MPI info object to be used for file open as + * defined in MPI_FILE_OPEN of MPI-2. This function does not + * make a duplicated info. Any modification to info after + * this function call returns may have an indeterminate effect + * on the access property list. Users should not modify the + * info while it is defined in a property list. + * Return: Success: SUCCEED + * Failure: FAIL + * Programmer: Bill Wendling + * 30. January 2003 + * Modifications: + *------------------------------------------------------------------------- + */ +herr_t +H5Pset_fapl_fphdf5(hid_t fapl_id, MPI_Comm comm, MPI_Info info) +{ + H5FD_fphdf5_fapl_t fa; + H5P_genplist_t *plist; + herr_t ret_value; + + FUNC_ENTER_API(H5Pset_fapl_fphdf5, FAIL); + H5TRACE3("e","iMcMi",fapl_id,comm,info); + + if (fapl_id == H5P_DEFAULT) + HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, + "can't set values in default property list"); + + /* Check arguments */ + if ((plist = H5P_object_verify(fapl_id,H5P_FILE_ACCESS)) == NULL) + HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a file access list"); + + /* Initialize driver specific properties */ + fa.comm = comm; + fa.info = info; + + ret_value = H5P_set_driver(plist, H5FD_FPHDF5, &fa); + +done: + FUNC_LEAVE_API(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5Pget_fapl_fphdf5 + * Purpose: If the file access property list is set to the + * H5FD_FPHDF5 driver then this function returns the MPI + * communicator and information through the COMM and INFO + * pointers. + * Return: Success: SUCCEED with the communicator and information + * returned through the COMM and INFO arguments + * if non-null. Neither piece of information is + * copied and they are therefore valid only + * until the file access property list is + * modified or closed. + * Failure: FAIL + * Programmer: Bill Wendling + * 30. January 2003 + * Modifications: + *------------------------------------------------------------------------- + */ +herr_t +H5Pget_fapl_fphdf5(hid_t fapl_id, MPI_Comm *comm /*out*/, MPI_Info *info /*out*/) +{ + H5FD_fphdf5_fapl_t *fa; + H5P_genplist_t *plist; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_API(H5Pget_fapl_fphdf5, FAIL); + H5TRACE3("e","ixx",fapl_id,comm,info); + + if ((plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS)) == NULL) + HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a file access list"); + + if (H5P_get_driver(plist) != H5FD_FPHDF5) + HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "incorrect VFL driver"); + + if ((fa = H5P_get_driver_info(plist)) == NULL) + HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "bad VFL driver info"); + + if (comm) + *comm = fa->comm; + + if (info) + *info = fa->info; + +done: + FUNC_LEAVE_API(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_communicator + * Purpose: Returns the MPI communicator for the file. + * Return: Success: The communicator + * Failure: NULL + * Programmer: Bill Wendling + * 30. January 2003 + * Modifications: + *------------------------------------------------------------------------- + */ +MPI_Comm +H5FD_fphdf5_communicator(H5FD_t *_file) +{ + H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; + MPI_Comm ret_value; + + FUNC_ENTER_NOAPI(H5FD_fphdf5_communicator, MPI_COMM_NULL); + + /* check args */ + assert(file); + assert(file->pub.driver_id == H5FD_FPHDF5); + + /* Set return value */ + ret_value = file->comm; + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_mpi_rank + * Purpose: Returns the MPI rank for a process + * Return: Success: MPI rank + * Failure: FAIL + * Programmer: Bill Wendling + * 30. January 2003 + * Modifications: + *------------------------------------------------------------------------- + */ +int +H5FD_fphdf5_mpi_rank(H5FD_t *_file) +{ + H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; + int ret_value; + + FUNC_ENTER_NOAPI(H5FD_fphdf5_mpi_rank, FAIL); + + /* check args */ + assert(file); + assert(file->pub.driver_id == H5FD_FPHDF5); + + /* Set return value */ + ret_value = file->mpi_rank; + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_mpi_size + * Purpose: Returns the number of MPI processes + * Return: Success: Number of MPI processes + * Failure: FAIL + * Programmer: Bill Wendling + * 30. January 2003 + * Modifications: + *------------------------------------------------------------------------- + */ +int +H5FD_fphdf5_mpi_size(H5FD_t *_file) +{ + H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; + int ret_value; + + FUNC_ENTER_NOAPI(H5FD_fphdf5_mpi_size, FAIL); + + /* check args */ + assert(file); + assert(file->pub.driver_id == H5FD_FPHDF5); + + /* Set return value */ + ret_value = file->mpi_size; + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_setup + * Purpose: Set the buffer type BTYPE, file type FTYPE for a data + * transfer. Also request an MPI type transfer. + * Return: Success: SUCCEED + * Failure: FAIL + * Programmer: Bill Wendling + * 30. January 2003 + * Modifications: + *------------------------------------------------------------------------- + */ +herr_t +H5FD_fphdf5_setup(hid_t dxpl_id, MPI_Datatype btype, + MPI_Datatype ftype, unsigned use_view) +{ + H5P_genplist_t *plist; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI(H5FD_fphdf5_setup, FAIL); + + /* Check arguments */ + if ((plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)) == NULL) + HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dataset transfer list"); + + /* Set buffer MPI type */ + if (H5P_insert(plist, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME, + H5FD_FPHDF5_XFER_MEM_MPI_TYPE_SIZE, &btype, + NULL, NULL, NULL, NULL, NULL) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't insert MPI-I/O property"); + + /* Set file MPI type */ + if (H5P_insert(plist, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME, + H5FD_FPHDF5_XFER_FILE_MPI_TYPE_SIZE, &ftype, + NULL, NULL, NULL, NULL, NULL) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't insert MPI-I/O property"); + + /* Set 'use view' property */ + if (H5P_insert(plist, H5FD_FPHDF5_XFER_USE_VIEW_NAME, + H5FD_FPHDF5_XFER_USE_VIEW_SIZE, &use_view, + NULL, NULL, NULL, NULL, NULL) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't insert MPI-I/O property"); + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_teardown + * Purpose: Remove the temporary MPI-I/O properties from dxpl. + * Return: Success: SUCCEED + * Failure: FAIL + * Programmer: Bill Wendling + * 30. January 2003 + * Modifications: + *------------------------------------------------------------------------- + */ +herr_t +H5FD_fphdf5_teardown(hid_t dxpl_id) +{ + H5P_genplist_t *plist; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI(H5FD_fphdf5_teardown, FAIL); + + /* Check arguments */ + if ((plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)) == NULL) + HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dataset transfer list"); + + /* Remove buffer MPI type */ + if (H5P_remove(dxpl_id, plist, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTDELETE, FAIL, "can't remove MPI-I/O property"); + + /* Remove file MPI type */ + if (H5P_remove(dxpl_id, plist, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTDELETE, FAIL, "can't remove MPI-I/O property"); + + /* Remove 'use view' property */ + if (H5P_remove(dxpl_id, plist, H5FD_FPHDF5_XFER_USE_VIEW_NAME) < 0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTDELETE, FAIL, "can't remove MPI-I/O property"); + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_fapl_get + * + * Purpose: Returns a file access property list which could be used to + * create another file the same as this one. + * + * Return: Success: Ptr to new file access property list with all + * fields copied from the file pointer. + * + * Failure: NULL + * + * Programmer: Robb Matzke + * Friday, August 13, 1999 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static void * +H5FD_fphdf5_fapl_get(H5FD_t *_file) +{ + H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; + H5FD_fphdf5_fapl_t *fa = NULL; + void *ret_value; + + FUNC_ENTER_NOAPI(H5FD_fphdf5_fapl_get, NULL); + + /* check args */ + assert(file); + assert(file->pub.driver_id == H5FD_FPHDF5); + + if ((fa = H5MM_calloc(sizeof(H5FD_fphdf5_fapl_t))) == NULL) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed"); + + /* These should both be copied. --rpm, 1999-08-13 */ + fa->comm = file->comm; + fa->info = file->info; + + /* Set return value */ + ret_value = fa; + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_open + * Purpose: Opens a file with name NAME. The FLAGS are a bit field with + * purpose similar to the second argument of open(2) and + * which are defined in H5Fpublic.h. The file access + * property list FAPL_ID contains the properties driver + * properties and MAXADDR is the largest address which this + * file will be expected to access. This is collective. + * Return: Success: A new file pointer. + * Failure: NULL + * Programmer: Bill Wendling + * 05. February 2003 + * Modifications: + *------------------------------------------------------------------------- + */ +static H5FD_t * +H5FD_fphdf5_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr) +{ + H5FP_fphdf5_t *file = NULL; + MPI_File fh; + int mpi_amode; + int mrc; + MPI_Offset size; + const H5FD_fphdf5_fapl_t *fa = NULL; + H5FD_fphdf5_fapl_t _fa; + H5P_genplist_t *plist; + H5FD_t *ret_value = NULL; + unsigned file_id; + unsigned req_id; + MPI_Status status; + + /* Flag to indicate that the file was successfully opened */ + unsigned file_opened = FALSE; + + FUNC_ENTER_NOAPI(H5FD_fphdf5_open, NULL); + + /* check args */ + assert(name); + + /* Obtain a pointer to mpio-specific file access properties */ + if ((plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS)) == NULL) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "not a file access property list"); + + if (fapl_id == H5P_FILE_ACCESS_DEFAULT || H5P_get_driver(plist) != H5FD_FPHDF5) { + _fa.comm = MPI_COMM_SELF; /*default*/ + _fa.info = MPI_INFO_NULL; /*default*/ + fa = &_fa; + } else { + fa = H5P_get_driver_info(plist); + assert(fa); + } + + /* + * Convert HDF5 flags to MPI-IO flags. Some combinations are illegal; + * let MPI-IO figure it out + */ + mpi_amode = (flags & H5F_ACC_RDWR) ? MPI_MODE_RDWR : MPI_MODE_RDONLY; + + if (flags & H5F_ACC_CREAT) mpi_amode |= MPI_MODE_CREATE; + if (flags & H5F_ACC_EXCL) mpi_amode |= MPI_MODE_EXCL; + + /* OKAY: CAST DISCARDS CONST */ + if ((mrc = MPI_File_open(H5FP_SAP_COMM, (char *)name, mpi_amode, + fa->info, &fh)) != MPI_SUCCESS) + HMPI_GOTO_ERROR(NULL, "MPI_File_open failed", mrc); + + file_opened = TRUE; + + if (H5FP_request_open(name, (int)strlen(name), H5FP_OBJ_FILE, maxaddr, + &file_id, &req_id) == FAIL) + HGOTO_ERROR(H5E_IO, H5E_CANTOPENFILE, NULL, + "can't inform SAP of file open"); + + HDmemset(&status, 0, sizeof(status)); + + /* Get the file ID from the SAP */ + if (H5FP_my_rank == H5FP_capt_rank) + if ((mrc = MPI_Recv(&file_id, 1, MPI_UNSIGNED, (int)H5FP_sap_rank, + H5FP_TAG_FILE_ID, H5FP_SAP_COMM, + &status)) != MPI_SUCCESS) + HMPI_GOTO_ERROR(NULL, "MPI_Recv failed", mrc); + + /* Broadcast the file ID */ + if ((mrc = MPI_Bcast(&file_id, 1, MPI_UNSIGNED, + 0, H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS) + HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mrc); + + /* The captain rank will get the filesize and broadcast it. */ + if (H5FP_my_rank == H5FP_capt_rank) + /* Get current file size */ + if ((mrc = MPI_File_get_size(fh, &size)) != MPI_SUCCESS) + HMPI_GOTO_ERROR(NULL, "MPI_File_get_size failed", mrc); + + /* Broadcast file-size */ + if ((mrc = MPI_Bcast(&size, sizeof(MPI_Offset), MPI_BYTE, + 0, H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS) + HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mrc); + + /* Only if size > 0, truncate the file - if requested */ + if (size && (flags & H5F_ACC_TRUNC)) { + if ((mrc = MPI_File_set_size(fh, (MPI_Offset)0)) != MPI_SUCCESS) + HMPI_GOTO_ERROR(NULL, "MPI_File_set_size (file truncation) failed", mrc); + + /* Don't let any proc return until all have truncated the file. */ + if ((mrc = MPI_Barrier(H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS) + HMPI_GOTO_ERROR(NULL, "MPI_Barrier failed", mrc); + + size = 0; + } + + /* Build the return value and initialize it */ + if ((file = H5MM_calloc(sizeof(H5FP_fphdf5_t))) == NULL) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed"); + + file->f = fh; + file->comm = fa->comm; + file->info = fa->info; + file->mpi_rank = H5FP_my_rank; + file->mpi_size = H5FP_comm_size; + file->eof = H5FD_fphdf5_MPIOff_to_haddr(size); + + /* Set return value */ + ret_value = (H5FD_t *)file; + ret_value->fphdf5_id = file_id; /* the file descriptor used in FPHDF5 */ + +done: + if (!ret_value && file_opened) + MPI_File_close(&fh); + + FUNC_LEAVE_NOAPI(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_close + * + * Purpose: Closes a file. This is collective. + * + * Return: Success: Non-negative + * + * Failure: Negative + * + * Programmer: Unknown + * January 30, 1998 + * + * Modifications: + * Robb Matzke, 1998-02-18 + * Added the ACCESS_PARMS argument. + * + * Robb Matzke, 1999-08-06 + * Modified to work with the virtual file layer. + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_fphdf5_close(H5FD_t *_file) +{ + H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; + int mpi_code; /* mpi return code */ + herr_t ret_value=SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5FD_fphdf5_close, FAIL); + + assert(file); + assert(H5FD_FPHDF5==file->pub.driver_id); + + /* MPI_File_close sets argument to MPI_FILE_NULL */ + if (MPI_SUCCESS != (mpi_code=MPI_File_close(&(file->f)/*in,out*/))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_close failed", mpi_code); + + /* Clean up other stuff */ + H5MM_xfree(file); + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_query + * + * Purpose: Set the flags that this VFL driver is capable of supporting. + * (listed in H5FDpublic.h) + * + * Return: Success: non-negative + * + * Failure: negative + * + * Programmer: Quincey Koziol + * Friday, August 25, 2000 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_fphdf5_query(const H5FD_t UNUSED *_file, unsigned long *flags /* out */) +{ + herr_t ret_value=SUCCEED; + + FUNC_ENTER_NOAPI(H5FD_fphdf5_query, FAIL); + + /* Set the VFL feature flags that this driver supports */ + if(flags) { + *flags=0; + *flags|=H5FD_FEAT_AGGREGATE_METADATA; /* OK to aggregate metadata allocations */ + + /* Distinguish between updating the metadata accumulator on writes and + * reads. This is particularly (perhaps only, even) important for MPI-I/O + * where we guarantee that writes are collective, but reads may not be. + * If we were to allow the metadata accumulator to be written during a + * read operation, the application would hang. + */ + *flags|=H5FD_FEAT_ACCUMULATE_METADATA_WRITE; /* OK to accumulate metadata for faster writes */ + + *flags|=H5FD_FEAT_AGGREGATE_SMALLDATA; /* OK to aggregate "small" raw data allocations */ + } /* end if */ + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_get_eoa + * + * Purpose: Gets the end-of-address marker for the file. The EOA marker + * is the first address past the last byte allocated in the + * format address space. + * + * Return: Success: The end-of-address marker. + * + * Failure: HADDR_UNDEF + * + * Programmer: Robb Matzke + * Friday, August 6, 1999 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static haddr_t +H5FD_fphdf5_get_eoa(H5FD_t *_file) +{ + H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; + haddr_t ret_value; /* Return value */ + + FUNC_ENTER_NOAPI(H5FD_fphdf5_get_eoa, HADDR_UNDEF); + + assert(file); + assert(H5FD_FPHDF5==file->pub.driver_id); + + /* Set return value */ + ret_value=file->eoa; + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_set_eoa + * + * Purpose: Set the end-of-address marker for the file. This function is + * called shortly after an existing HDF5 file is opened in order + * to tell the driver where the end of the HDF5 data is located. + * + * Return: Success: 0 + * + * Failure: -1 + * + * Programmer: Robb Matzke + * Friday, August 6, 1999 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_fphdf5_set_eoa(H5FD_t *_file, haddr_t addr) +{ + H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; + herr_t ret_value=SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5FD_fphdf5_set_eoa, FAIL); + + assert(file); + assert(H5FD_FPHDF5==file->pub.driver_id); + + file->eoa = addr; + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_get_eof + * + * Purpose: Gets the end-of-file marker for the file. The EOF marker + * is the real size of the file. + * + * The MPIO driver doesn't bother keeping this field updated + * since that's a relatively expensive operation. Fortunately + * the library only needs the EOF just after the file is opened + * in order to determine whether the file is empty, truncated, + * or okay. Therefore, any MPIO I/O function will set its value + * to HADDR_UNDEF which is the error return value of this + * function. + * + * Return: Success: The end-of-address marker. + * + * Failure: HADDR_UNDEF + * + * Programmer: Robb Matzke + * Friday, August 6, 1999 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static haddr_t +H5FD_fphdf5_get_eof(H5FD_t *_file) +{ + H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; + haddr_t ret_value; /* Return value */ + + FUNC_ENTER_NOAPI(H5FD_fphdf5_get_eof, HADDR_UNDEF); + + assert(file); + assert(H5FD_FPHDF5==file->pub.driver_id); + + /* Set return value */ + ret_value=file->eof; + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_get_handle + * + * Purpose: Returns the file handle of MPIO file driver. + * + * Returns: Non-negative if succeed or negative if fails. + * + * Programmer: Raymond Lu + * Sept. 16, 2002 + * + * Modifications: + * + *------------------------------------------------------------------------- +*/ +static herr_t +H5FD_fphdf5_get_handle(H5FD_t *_file, hid_t UNUSED fapl, void** file_handle) +{ + H5FP_fphdf5_t *file = (H5FP_fphdf5_t *)_file; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI(H5FD_fphdf5_get_handle, FAIL); + + if(!file_handle) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file handle not valid"); + + *file_handle = &(file->f); + +done: + FUNC_LEAVE_NOAPI(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_read + * + * Purpose: Reads SIZE bytes of data from FILE beginning at address ADDR + * into buffer BUF according to data transfer properties in + * DXPL_ID using potentially complex file and buffer types to + * effect the transfer. + * + * Reading past the end of the MPI file returns zeros instead of + * failing. MPI is able to coalesce requests from different + * processes (collective or independent). + * + * Return: Success: Zero. Result is stored in caller-supplied + * buffer BUF. + * + * Failure: -1, Contents of buffer BUF are undefined. + * + * Programmer: rky, 1998-01-30 + * + * Modifications: + * Robb Matzke, 1998-02-18 + * Added the ACCESS_PARMS argument. + * + * rky, 1998-04-10 + * Call independent or collective MPI read, based on + * ACCESS_PARMS. + * + * Albert Cheng, 1998-06-01 + * Added XFER_MODE to control independent or collective MPI + * read. + * + * rky, 1998-08-16 + * Use BTYPE, FTYPE, and DISP from access parms. The guts of + * H5FD_fphdf5_read and H5FD_fphdf5_write should be replaced by a + * single dual-purpose routine. + * + * Robb Matzke, 1999-04-21 + * Changed XFER_MODE to XFER_PARMS for all H5F_*_read() + * callbacks. + * + * Robb Matzke, 1999-07-28 + * The ADDR argument is passed by value. + * + * Robb Matzke, 1999-08-06 + * Modified to work with the virtual file layer. + * + * Quincey Koziol, 2002-05-14 + * Only call MPI_Get_count if we can use MPI_BYTE for the MPI type + * for the I/O transfer. Someday we might include code to decode + * the MPI type used for more complicated transfers and call + * MPI_Get_count all the time. + * + * Quincey Koziol - 2002/06/17 + * Removed 'disp' parameter from H5FD_fphdf5_setup routine and use + * the address of the dataset in MPI_File_set_view() calls, as + * necessary. + * + * Quincey Koziol - 2002/06/24 + * Removed "lazy" MPI_File_set_view() calls, since they would fail + * if the first I/O was a collective I/O using MPI derived types + * and the next I/O was an independent I/O. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t addr, size_t size, + void *buf/*out*/) +{ +#if 0 + H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; + const H5FD_fphdf5_dxpl_t *dx=NULL; + H5FD_fphdf5_dxpl_t _dx; + MPI_Offset mpi_off, mpi_disp; + MPI_Status mpi_stat; + int mpi_code; /* mpi return code */ + MPI_Datatype buf_type, file_type; + int size_i, bytes_read, n; + unsigned use_view_this_time=0; + H5P_genplist_t *plist; /* Property list pointer */ + herr_t ret_value=SUCCEED; + + FUNC_ENTER_NOAPI(H5FD_fphdf5_read, FAIL); + + assert(file); + assert(H5FD_FPHDF5==file->pub.driver_id); + /* Make certain we have the correct type of property list */ + assert(H5I_GENPROP_LST==H5I_get_type(dxpl_id)); + assert(TRUE==H5P_isa_class(dxpl_id,H5P_DATASET_XFER)); + assert(buf); + + /* Portably initialize MPI status variable */ + HDmemset(&mpi_stat,0,sizeof(MPI_Status)); + + /* some numeric conversions */ + if (H5FD_fphdf5_haddr_to_MPIOff(addr, &mpi_off/*out*/)<0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off"); + size_i = (int)size; + if ((hsize_t)size_i != size) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i"); + + /* Obtain the data transfer properties */ + if(NULL == (plist = H5I_object(dxpl_id))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list"); + if (H5FD_FPHDF5!=H5P_get_driver(plist)) { + _dx.xfer_mode = H5FD_FPHDF5_INDEPENDENT; /*the default*/ + dx = &_dx; + } else { + dx = H5P_get_driver_info(plist); + assert(dx); + } + + /* + * Set up for a fancy xfer using complex types, or single byte block. We + * wouldn't need to rely on the use_view field if MPI semantics allowed + * us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which + * could mean "use MPI_BYTE" by convention). + */ + if(H5P_exist_plist(plist,H5FD_FPHDF5_XFER_USE_VIEW_NAME)>0) + if(H5P_get(plist,H5FD_FPHDF5_XFER_USE_VIEW_NAME,&use_view_this_time)<0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property"); + + if (use_view_this_time) { + /* prepare for a full-blown xfer using btype, ftype, and disp */ + if(H5P_get(plist,H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property"); + if(H5P_get(plist,H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME,&file_type)<0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property"); + + /* When using types, use the address as the displacement for + * MPI_File_set_view and reset the address for the read to zero + */ + mpi_disp=mpi_off; + mpi_off=0; + } /* end if */ + else { + /* + * Prepare for a simple xfer of a contiguous block of bytes. The + * btype, ftype, and disp fields are not used. + */ + buf_type = MPI_BYTE; + file_type = MPI_BYTE; + mpi_disp = 0; /* mpi_off is alread set */ + } /* end else */ + + /* + * Set the file view when we are using MPI derived types + */ + if (use_view_this_time) { + /*OKAY: CAST DISCARDS CONST QUALIFIER*/ + if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code); + } /* end if */ + + /* Read the data. */ + assert(H5FD_FPHDF5_INDEPENDENT==dx->xfer_mode || H5FD_FPHDF5_COLLECTIVE==dx->xfer_mode); + if (H5FD_FPHDF5_INDEPENDENT==dx->xfer_mode) { + if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code); + } else { + if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat ))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code); + } + + /* KLUDGE, Robb Matzke, 2000-12-29 + * The LAM implementation of MPI_Get_count() says + * MPI_Get_count: invalid argument (rank 0, MPI_COMM_WORLD) + * So I'm commenting this out until it can be investigated. The + * returned `bytes_written' isn't used anyway because of Kim's + * kludge to avoid bytes_written<0. Likewise in H5FD_fphdf5_write(). */ + +#ifdef H5_HAVE_MPI_GET_COUNT /* Bill and Albert's kludge*/ + /* Yet Another KLUDGE, Albert Cheng & Bill Wendling, 2001-05-11. + * Many systems don't support MPI_Get_count so we need to do a + * configure thingy to fix this. */ + + /* Calling MPI_Get_count with "MPI_BYTE" is only valid when we actually + * had the 'buf_type' set to MPI_BYTE -QAK + */ + if(use_view_this_time) { + /* Figure out the mapping from the MPI 'buf_type' to bytes, someday... + * If this gets fixed (and MPI_Get_count() is reliable), the + * kludge below where the 'bytes_read' value from MPI_Get_count() is + * overwritten with the 'size_i' parameter can be removed. -QAK + */ + } /* end if */ + else { + /* How many bytes were actually read? */ + if (MPI_SUCCESS != (mpi_code=MPI_Get_count(&mpi_stat, MPI_BYTE, &bytes_read))) + HMPI_GOTO_ERROR(FAIL, "MPI_Get_count failed", mpi_code); + } /* end else */ +#endif /* H5_HAVE_MPI_GET_COUNT */ + + /* + * KLUGE rky 1998-02-02 + * MPI_Get_count incorrectly returns negative count; fake a complete + * read. + */ + bytes_read = size_i; + + /* Check for read failure */ + if (bytes_read<0 || bytes_read>size_i) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed"); + + /* + * Reset the file view when we used MPI derived types + */ + if (use_view_this_time) { + /*OKAY: CAST DISCARDS CONST QUALIFIER*/ + if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code); + } /* end if */ + + /* + * This gives us zeroes beyond end of physical MPI file. What about + * reading past logical end of HDF5 file??? + */ + if ((n=(size_i-bytes_read)) > 0) { + if (use_view_this_time) { + /* + * INCOMPLETE rky 1998-09-18 + * Haven't implemented reading zeros beyond EOF. What to do??? + */ + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "eof file read failed"); + } else { + memset((char*)buf+bytes_read, 0, (size_t)n); + } + } + +done: + FUNC_LEAVE_NOAPI(ret_value); +#else + return SUCCEED; +#endif +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_write + * + * Purpose: Writes SIZE bytes of data to FILE beginning at address ADDR + * from buffer BUF according to data transfer properties in + * DXPL_ID using potentially complex file and buffer types to + * effect the transfer. + * + * MPI is able to coalesce requests from different processes + * (collective and independent). + * + * Return: Success: Zero. USE_TYPES and OLD_USE_TYPES in the + * access params are altered. + * + * Failure: -1, USE_TYPES and OLD_USE_TYPES in the + * access params may be altered. + * + * Programmer: Unknown + * January 30, 1998 + * + * Modifications: + * rky, 1998-08-28 + * If the file->allsame flag is set, we assume that all the + * procs in the relevant MPI communicator will write identical + * data at identical offsets in the file, so only proc 0 will + * write, and all other procs will wait for p0 to finish. This + * is useful for writing metadata, for example. Note that we + * don't _check_ that the data is identical. Also, the mechanism + * we use to eliminate the redundant writes is by requiring a + * call to H5FD_fphdf5_tas_allsame before the write, which is + * rather klugey. Would it be better to pass a parameter to + * low-level writes like H5F_block_write and H5F_low_write, + * instead? Or...??? Also, when I created this mechanism I + * wanted to minimize the difference in behavior between the old + * way of doing things (i.e., all procs write) and the new way, + * so the writes are eliminated at the very lowest level, here + * in H5FD_fphdf5_write. It may be better to rethink that, and + * short-circuit the writes at a higher level (e.g., at the + * points in the code where H5FD_fphdf5_tas_allsame is called). + * + * + * Robb Matzke, 1998-02-18 + * Added the ACCESS_PARMS argument. + * + * rky, 1998-04-10 + * Call independent or collective MPI write, based on + * ACCESS_PARMS. + * + * rky, 1998-04-24 + * Removed redundant write from H5FD_fphdf5_write. + * + * Albert Cheng, 1998-06-01 + * Added XFER_MODE to control independent or collective MPI + * write. + * + * rky, 1998-08-16 + * Use BTYPE, FTYPE, and DISP from access parms. The guts of + * H5FD_fphdf5_read and H5FD_fphdf5_write should be replaced by a + * single dual-purpose routine. + * + * rky, 1998-08-28 + * Added ALLSAME parameter to make all but proc 0 skip the + * actual write. + * + * Robb Matzke, 1999-04-21 + * Changed XFER_MODE to XFER_PARMS for all H5FD_*_write() + * callbacks. + * + * Robb Matzke, 1999-07-28 + * The ADDR argument is passed by value. + * + * Robb Matzke, 1999-08-06 + * Modified to work with the virtual file layer. + * + * Albert Cheng, 1999-12-19 + * When only-p0-write-allsame-data, p0 Bcasts the + * ret_value to other processes. This prevents + * a racing condition (that other processes try to + * read the file before p0 finishes writing) and also + * allows all processes to report the same ret_value. + * + * Kim Yates, Pat Weidhaas, 2000-09-26 + * Move block of coding where only p0 writes after the + * MPI_File_set_view call. + * + * Quincey Koziol, 2002-05-10 + * Instead of always writing metadata from process 0, spread the + * burden among all the processes by using a round-robin rotation + * scheme. + * + * Quincey Koziol, 2002-05-10 + * Removed allsame code, keying off the type parameter instead. + * + * Quincey Koziol, 2002-05-14 + * Only call MPI_Get_count if we can use MPI_BYTE for the MPI type + * for the I/O transfer. Someday we might include code to decode + * the MPI type used for more complicated transfers and call + * MPI_Get_count all the time. + * + * Quincey Koziol - 2002/06/17 + * Removed 'disp' parameter from H5FD_fphdf5_setup routine and use + * the address of the dataset in MPI_File_set_view() calls, as + * necessary. + * + * Quincey Koziol - 2002/06/24 + * Removed "lazy" MPI_File_set_view() calls, since they would fail + * if the first I/O was a collective I/O using MPI derived types + * and the next I/O was an independent I/O. + * + * Quincey Koziol - 2002/07/18 + * Added "block_before_meta_write" dataset transfer flag, which + * is set during writes from a metadata cache flush and indicates + * that all the processes must sync up before (one of them) + * writing metadata. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_fphdf5_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, + size_t size, const void *buf) +{ +#if 0 + H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; + const H5FD_fphdf5_dxpl_t *dx=NULL; + H5FD_fphdf5_dxpl_t _dx; + MPI_Offset mpi_off, mpi_disp; + MPI_Status mpi_stat; + MPI_Datatype buf_type, file_type; + int mpi_code; /* MPI return code */ + int size_i, bytes_written; + unsigned use_view_this_time=0; + unsigned block_before_meta_write=0; /* Whether to block before a metadata write */ + H5P_genplist_t *plist; /* Property list pointer */ + herr_t ret_value=SUCCEED; + + FUNC_ENTER_NOAPI(H5FD_fphdf5_write, FAIL); + + assert(file); + assert(H5FD_FPHDF5==file->pub.driver_id); + /* Make certain we have the correct type of property list */ + assert(H5I_GENPROP_LST==H5I_get_type(dxpl_id)); + assert(TRUE==H5P_isa_class(dxpl_id,H5P_DATASET_XFER)); + assert(buf); + + /* Portably initialize MPI status variable */ + HDmemset(&mpi_stat,0,sizeof(MPI_Status)); + + /* some numeric conversions */ + if (H5FD_fphdf5_haddr_to_MPIOff(addr, &mpi_off)<0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off"); + size_i = (int)size; + if ((hsize_t)size_i != size) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i"); + + /* Obtain the data transfer properties */ + if(NULL == (plist = H5I_object(dxpl_id))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list"); + if (H5FD_FPHDF5!=H5P_get_driver(plist)) { + _dx.xfer_mode = H5FD_FPHDF5_INDEPENDENT; /*the default*/ + dx = &_dx; + } else { + dx = H5P_get_driver_info(plist); + assert(dx); + } + + /* + * Set up for a fancy xfer using complex types, or single byte block. We + * wouldn't need to rely on the use_view field if MPI semantics allowed + * us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which + * could mean "use MPI_BYTE" by convention). + */ + if(H5P_exist_plist(plist,H5FD_FPHDF5_XFER_USE_VIEW_NAME)>0) + if(H5P_get(plist,H5FD_FPHDF5_XFER_USE_VIEW_NAME,&use_view_this_time)<0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property"); + + if (use_view_this_time) { + /* prepare for a full-blown xfer using btype, ftype, and disp */ + if(H5P_get(plist,H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property"); + if(H5P_get(plist,H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME,&file_type)<0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property"); + + /* When using types, use the address as the displacement for + * MPI_File_set_view and reset the address for the read to zero + */ + mpi_disp=mpi_off; + mpi_off=0; + } /* end if */ + else { + /* + * Prepare for a simple xfer of a contiguous block of bytes. + * The btype, ftype, and disp fields are not used. + */ + buf_type = MPI_BYTE; + file_type = MPI_BYTE; + mpi_disp = 0; /* mpi_off is already set */ + } /* end else */ + + /* + * Set the file view when we are using MPI derived types + */ + if (use_view_this_time) { + /*OKAY: CAST DISCARDS CONST QUALIFIER*/ + if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code); + } /* end if */ + + /* Metadata specific actions */ + if(type!=H5FD_MEM_DRAW) { + /* Check if we need to syncronize all processes before attempting metadata write + * (Prevents race condition where the process writing the metadata goes ahead + * and writes the metadata to the file before all the processes have + * read the data, "transmitting" data from the "future" to the reading + * process. -QAK ) + */ + if(H5P_exist_plist(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME)>0) + if(H5P_get(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,&block_before_meta_write)<0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get H5AC property"); + + if(block_before_meta_write) + if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code); + + /* Only p will do the actual write if all procs in comm write same metadata */ + if (H5_fphdf5_1_metawrite_g) { + if (file->mpi_rank != file->mpi_round) { + HGOTO_DONE(SUCCEED) /* skip the actual write */ + } + } + } /* end if */ + + /* Write the data. */ + assert(H5FD_MPIO_INDEPENDENT==dx->xfer_mode || H5FD_MPIO_COLLECTIVE==dx->xfer_mode); + if (H5FD_MPIO_INDEPENDENT==dx->xfer_mode) { + /*OKAY: CAST DISCARDS CONST QUALIFIER*/ + if (MPI_SUCCESS != (mpi_code=MPI_File_write_at(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code); + } else { + /*OKAY: CAST DISCARDS CONST QUALIFIER*/ + if (MPI_SUCCESS != (mpi_code=MPI_File_write_at_all(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code); + } + + /* KLUDGE, Robb Matzke, 2000-12-29 + * The LAM implementation of MPI_Get_count() says + * MPI_Get_count: invalid argument (rank 0, MPI_COMM_WORLD) + * So I'm commenting this out until it can be investigated. The + * returned `bytes_written' isn't used anyway because of Kim's + * kludge to avoid bytes_written<0. Likewise in H5FD_fphdf5_read(). */ + +#ifdef H5_HAVE_MPI_GET_COUNT /* Bill and Albert's kludge*/ + /* Yet Another KLUDGE, Albert Cheng & Bill Wendling, 2001-05-11. + * Many systems don't support MPI_Get_count so we need to do a + * configure thingy to fix this. */ + + /* Calling MPI_Get_count with "MPI_BYTE" is only valid when we actually + * had the 'buf_type' set to MPI_BYTE -QAK + */ + if(use_view_this_time) { + /* Figure out the mapping from the MPI 'buf_type' to bytes, someday... + * If this gets fixed (and MPI_Get_count() is reliable), the + * kludge below where the 'bytes_written' value from MPI_Get_count() is + * overwritten with the 'size_i' parameter can be removed. -QAK + */ + } /* end if */ + else { + /* How many bytes were actually written? */ + if (MPI_SUCCESS!= (mpi_code=MPI_Get_count(&mpi_stat, MPI_BYTE, &bytes_written))) + HMPI_GOTO_ERROR(FAIL, "MPI_Get_count failed", mpi_code); + } /* end else */ +#endif /* H5_HAVE_MPI_GET_COUNT */ + + /* + * KLUGE rky, 1998-02-02 + * MPI_Get_count incorrectly returns negative count; fake a complete + * write. + */ + bytes_written = size_i; + + /* Check for write failure */ + if (bytes_written<0 || bytes_written>size_i) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file write failed"); + + /* + * Reset the file view when we used MPI derived types + */ + if (use_view_this_time) { + /*OKAY: CAST DISCARDS CONST QUALIFIER*/ + if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code); + } /* end if */ + + /* Forget the EOF value (see H5FD_fphdf5_get_eof()) --rpm 1999-08-06 */ + file->eof = HADDR_UNDEF; + +done: + /* Guard against getting into metadate broadcast in failure cases */ + if(ret_value!=FAIL) { + /* if only p writes, need to broadcast the ret_value to other processes */ + if ((type!=H5FD_MEM_DRAW) && H5_fphdf5_1_metawrite_g) { + if (MPI_SUCCESS != (mpi_code=MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, file->mpi_round, file->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code); + + /* Round-robin rotate to the next process */ + file->mpi_round = (++file->mpi_round)%file->mpi_size; + } /* end if */ + } /* end if */ + + FUNC_LEAVE_NOAPI(ret_value); +#else + return SUCCEED; +#endif +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_flush + * + * Purpose: Makes sure that all data is on disk. This is collective. + * + * Return: Success: Non-negative + * + * Failure: Negative + * + * Programmer: Unknown + * January 30, 1998 + * + * Modifications: + * Robb Matzke, 1998-02-18 + * Added the ACCESS_PARMS argument. + * + * Robb Matzke, 1999-08-06 + * Modified to work with the virtual file layer. + * + * Robb Matzke, 2000-12-29 + * Make sure file size is at least as large as the last + * allocated byte. + * + * Quincey Koziol, 2002-06-?? + * Changed file extension method to use MPI_File_set_size instead + * read->write method. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_fphdf5_flush(H5FD_t *_file, unsigned closing) +{ +#if 0 + H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file; + int mpi_code; /* mpi return code */ + MPI_Offset mpi_off; + herr_t ret_value=SUCCEED; +#ifdef OLD_WAY + uint8_t byte=0; + MPI_Status mpi_stat; +#endif /* OLD_WAY */ + + FUNC_ENTER_NOAPI(H5FD_fphdf5_flush, FAIL); + + assert(file); + assert(H5FD_FPHDF5==file->pub.driver_id); + +#ifdef OLD_WAY + /* Portably initialize MPI status variable */ + HDmemset(&mpi_stat,0,sizeof(MPI_Status)); +#endif /* OLD_WAY */ + + /* Extend the file to make sure it's large enough, then sync. + * Unfortunately, keeping track of EOF is an expensive operation, so + * we can't just check whether EOFeoa>file->last_eoa) { +#ifdef OLD_WAY + if (0==file->mpi_rank) { + if (H5FD_fphdf5_haddr_to_MPIOff(file->eoa-1, &mpi_off)<0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "cannot convert from haddr_t to MPI_Offset"); + if (MPI_SUCCESS != (mpi_code=MPI_File_read_at(file->f, mpi_off, &byte, 1, MPI_BYTE, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code); + if (MPI_SUCCESS != (mpi_code=MPI_File_write_at(file->f, mpi_off, &byte, 1, MPI_BYTE, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code); + } /* end if */ +#else /* OLD_WAY */ + if (H5FD_fphdf5_haddr_to_MPIOff(file->eoa, &mpi_off)<0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "cannot convert from haddr_t to MPI_Offset"); + + /* Extend the file's size */ + if (MPI_SUCCESS != (mpi_code=MPI_File_set_size(file->f, mpi_off))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_size failed", mpi_code); + + /* Don't let any proc return until all have extended the file. + * (Prevents race condition where some processes go ahead and write + * more data to the file before all the processes have finished making + * it the shorter length, potentially truncating the file and dropping + * the new data written) + */ + if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code); +#endif /* OLD_WAY */ + + /* Update the 'last' eoa value */ + file->last_eoa=file->eoa; + } /* end if */ + + /* Only sync the file if we are not going to immediately close it */ + if(!closing) { + if (MPI_SUCCESS != (mpi_code=MPI_File_sync(file->f))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_sync failed", mpi_code); + } /* end if */ + +done: + FUNC_LEAVE_NOAPI(ret_value); +#else + return SUCCEED; +#endif +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_MPIOff_to_haddr + * Purpose: Convert an MPI_Offset value to haddr_t. + * Return: Success: The haddr_t equivalent of the MPI_OFF argument. + * Failure: HADDR_UNDEF + * Programmer: Bill Wendling + * 30. January 2003 + * Modifications: + *------------------------------------------------------------------------- + */ +static haddr_t +H5FD_fphdf5_MPIOff_to_haddr(MPI_Offset mpi_off) +{ + haddr_t ret_value; + + FUNC_ENTER_NOINIT(H5FD_fphdf5_MPIOff_to_haddr); + ret_value = (mpi_off != (MPI_Offset)(haddr_t)mpi_off ? HADDR_UNDEF : (haddr_t)mpi_off); + FUNC_LEAVE_NOAPI(ret_value); +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_haddr_to_MPIOff + * Purpose: Convert an haddr_t value to MPI_Offset. + * Return: Success: Non-negative, the MPI_OFF argument contains + * the converted value. + * Failure: FAIL, MPI_OFF is undefined. + * Programmer: Bill Wendling + * 30. January 2003 + * Modifications: + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_fphdf5_haddr_to_MPIOff(haddr_t addr, MPI_Offset *mpi_off /*out*/) +{ + herr_t ret_value = FAIL; + + FUNC_ENTER_NOINIT(H5FD_fphdf5_haddr_to_MPIOff); + + if (mpi_off) + *mpi_off = (MPI_Offset)addr; + + ret_value = (addr != (haddr_t)(MPI_Offset)addr ? FAIL : SUCCEED); + FUNC_LEAVE_NOAPI(ret_value); +} + +#endif /* H5_HAVE_FPHDF5 */ diff --git a/src/H5FDfphdf5.h b/src/H5FDfphdf5.h new file mode 100644 index 0000000..46061ec --- /dev/null +++ b/src/H5FDfphdf5.h @@ -0,0 +1,62 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the files COPYING and Copyright.html. COPYING can be found at the root * + * of the source code distribution tree; Copyright.html can be found at the * + * root level of an installed copy of the electronic HDF5 document set and * + * is linked from the top-level documents page. It can also be found at * + * http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html. If you do not have * + * access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#ifndef H5FDFPHDF5_H__ +#define H5FDFPHDF5_H__ + +#include "H5FDpublic.h" +#include "H5Ipublic.h" + +#ifdef H5_HAVE_PARALLEL +# define H5FD_FPHDF5 (H5FD_fphdf5_init()) +#else +# define H5FD_FPHDF5 (-1) +#endif /* H5_HAVE_PARALLEL */ + +/* Macros */ + +#define IS_H5FD_FPHDF5(f) (H5F_get_driver_id(f) == H5FD_FPHDF5) + +#ifdef H5_HAVE_PARALLEL + +/* Turn on H5FDfphdf5_debug if H5F_DEBUG is on */ +#ifdef H5F_DEBUG +# ifndef H5FDfphdf5_DEBUG +# define H5FDfphdf5_DEBUG +# endif +#endif + +/* Function prototypes */ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +H5_DLL hid_t H5FD_fphdf5_init(void); +H5_DLL herr_t H5Pset_fapl_fphdf5(hid_t fapl_id, MPI_Comm comm, MPI_Info info); +H5_DLL herr_t H5Pget_fapl_fphdf5(hid_t fapl_id, MPI_Comm *comm/*out*/, + MPI_Info *info/*out*/); +H5_DLL MPI_Comm H5FD_fphdf5_communicator(H5FD_t *_file); +H5_DLL herr_t H5FD_fphdf5_setup(hid_t dxpl_id, MPI_Datatype btype, + MPI_Datatype ftype, unsigned use_view); +H5_DLL herr_t H5FD_fphdf5_teardown(hid_t dxpl_id); +H5_DLL int H5FD_fphdf5_mpi_rank(H5FD_t *_file); +H5_DLL int H5FD_fphdf5_mpi_size(H5FD_t *_file); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* H5_HAVE_PARALLEL */ + +#endif /* H5FDFPHDF5_H__ */ diff --git a/src/H5FP.c b/src/H5FP.c index 16e210b..c246235 100644 --- a/src/H5FP.c +++ b/src/H5FP.c @@ -266,7 +266,7 @@ H5FP_read_metadata(char **mdata, int len, int from) HDmemset(*mdata, 0, (size_t)len + 1); - if ((mrc = MPI_Recv(*mdata, len, MPI_BYTE, from, H5FP_TAG_METADATA, + if ((mrc = MPI_Recv(*mdata, len + 1, MPI_BYTE, from, H5FP_TAG_METADATA, H5FP_SAP_COMM, &status)) != MPI_SUCCESS) { HDfree(*mdata); *mdata = NULL; diff --git a/src/H5FPclient.c b/src/H5FPclient.c index 0d0b09e..c0e83b7 100644 --- a/src/H5FPclient.c +++ b/src/H5FPclient.c @@ -20,11 +20,12 @@ #include "H5Dprivate.h" /* Dataset Functions */ #include "H5Eprivate.h" /* Error Handling */ #include "H5Fprivate.h" /* Files */ -#include "H5Gpkg.h" /* Group functions */ +#include "H5FDprivate.h" /* File Drivers */ +#include "H5Gpkg.h" /* Group Functions */ #include "H5Iprivate.h" /* ID Functions */ -#include "H5MMprivate.h" /* Memory allocation */ +#include "H5MMprivate.h" /* Memory Allocation */ #include "H5Oprivate.h" /* Object Headers */ -#include "H5Spkg.h" /* Dataspace functions */ +#include "H5Spkg.h" /* Dataspace Functions */ #include "H5TBprivate.h" /* Threaded, Balanced, Binary Trees */ #ifdef H5_HAVE_FPHDF5 @@ -40,7 +41,7 @@ static int interface_initialize_g = 0; /* local functions */ static unsigned H5FP_gen_request_id(void); -static herr_t H5FP_dump_to_file(H5F_t *file, H5FP_read *sap_read); +static herr_t H5FP_dump_to_file(H5FD_t *file, H5FP_read *sap_read); /* *===----------------------------------------------------------------------=== @@ -68,7 +69,7 @@ static herr_t H5FP_dump_to_file(H5F_t *file, H5FP_read *sap_read); */ herr_t H5FP_request_open(const char *mdata, int md_size, H5FP_obj_t obj_type, - unsigned *file_id, unsigned *req_id) + haddr_t maxaddr, unsigned *file_id, unsigned *req_id) { H5FP_request req; MPI_Status mpi_status; @@ -94,6 +95,7 @@ H5FP_request_open(const char *mdata, int md_size, H5FP_obj_t obj_type, req.proc_rank = H5FP_my_rank; req.md_size = md_size; req.obj_type = obj_type; + req.addr = maxaddr; if ((mrc = MPI_Send(&req, 1, H5FP_request_t, (int)H5FP_sap_rank, H5FP_TAG_REQUEST, H5FP_SAP_COMM)) != MPI_SUCCESS) @@ -151,8 +153,10 @@ H5FP_request_lock(unsigned file_id, unsigned char *obj_oid, H5FP_COPY_OID(req.oid, obj_oid); if ((mrc = MPI_Send(&req, 1, H5FP_request_t, (int)H5FP_sap_rank, - H5FP_TAG_REQUEST, H5FP_SAP_COMM)) != MPI_SUCCESS) + H5FP_TAG_REQUEST, H5FP_SAP_COMM)) != MPI_SUCCESS) { + *status = H5FP_STATUS_LOCK_FAILED; HMPI_GOTO_ERROR(FAIL, "MPI_Send failed", mrc); + } if (last) { /* @@ -175,9 +179,6 @@ H5FP_request_lock(unsigned file_id, unsigned char *obj_oid, } done: - if (ret_value == FAIL) - *status = H5FP_STATUS_LOCK_FAILED; - *req_id = req.req_id; FUNC_LEAVE_NOAPI(ret_value); } @@ -220,8 +221,10 @@ H5FP_request_release_lock(unsigned file_id, unsigned char *obj_oid, req.proc_rank = H5FP_my_rank; if ((mrc = MPI_Send(&req, 1, H5FP_request_t, (int)H5FP_sap_rank, - H5FP_TAG_REQUEST, H5FP_SAP_COMM)) != MPI_SUCCESS) + H5FP_TAG_REQUEST, H5FP_SAP_COMM)) != MPI_SUCCESS) { + *status = H5FP_STATUS_LOCK_RELEASE_FAILED; HMPI_GOTO_ERROR(FAIL, "MPI_Send failed", mrc); + } if (last) { /* @@ -240,7 +243,8 @@ H5FP_request_release_lock(unsigned file_id, unsigned char *obj_oid, *status = sap_reply.status; if (sap_reply.status != H5FP_STATUS_LOCK_RELEASED) - HGOTO_ERROR(H5E_RESOURCE, H5E_CANTUNLOCK, FAIL, "can't unlock object on server"); + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTUNLOCK, FAIL, + "can't unlock object on server"); } done: @@ -264,10 +268,9 @@ done: * Modifications: */ herr_t -H5FP_request_read_metadata(H5F_t *file, unsigned file_id, H5FP_obj_t obj_type, - H5AC_subid_t type_id, haddr_t addr, size_t size, - uint8_t **buf, unsigned *req_id, - H5FP_status_t *status) +H5FP_request_read_metadata(H5FD_t *file, unsigned file_id, H5FD_mem_t mem_type, + haddr_t addr, size_t size, uint8_t **buf, + unsigned *req_id, H5FP_status_t *status) { H5FP_request req; H5FP_read sap_read; /* metadata info read from the SAP's cache */ @@ -290,8 +293,6 @@ H5FP_request_read_metadata(H5F_t *file, unsigned file_id, H5FP_obj_t obj_type, req.req_id = H5FP_gen_request_id(); req.file_id = file_id; req.proc_rank = H5FP_my_rank; - req.obj_type = obj_type; - req.type_id = type_id; req.addr = addr; if ((mrc = MPI_Send(&req, 1, H5FP_request_t, (int)H5FP_sap_rank, @@ -304,15 +305,14 @@ H5FP_request_read_metadata(H5F_t *file, unsigned file_id, H5FP_obj_t obj_type, H5FP_SAP_COMM, &mpi_status)) != MPI_SUCCESS) HMPI_GOTO_ERROR(FAIL, "MPI_Recv failed", mrc); - *status = sap_read.status; + HDmemset(*buf, '\0', size); - switch (*status) { + switch (sap_read.status) { case H5FP_STATUS_OK: /* use the info in the H5FP_read_t structure to update the metadata */ - HDmemset(*buf, '\0', size); HDmemset(&mpi_status, 0, sizeof(mpi_status)); - if ((mrc = MPI_Recv(*buf, (int)sap_read.md_size, MPI_BYTE, (int)H5FP_sap_rank, + if ((mrc = MPI_Recv(*buf, (int)size, MPI_BYTE, (int)H5FP_sap_rank, H5FP_TAG_METADATA, H5FP_SAP_COMM, &mpi_status)) != MPI_SUCCESS) HMPI_GOTO_ERROR(FAIL, "MPI_Recv failed", mrc); @@ -334,11 +334,15 @@ H5FP_request_read_metadata(H5F_t *file, unsigned file_id, H5FP_obj_t obj_type, * The metadata wasn't in the SAP's cache. Should read from disk * now. */ + H5FD_read(file, mem_type, H5P_DATASET_XFER_DEFAULT, addr, size, buf); break; default: - break; + *status = sap_read.status; + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTCHANGE, FAIL, "can't write metadata to server"); } + *status = H5FP_STATUS_OK; + done: *req_id = req.req_id; FUNC_LEAVE_NOAPI(ret_value); @@ -360,10 +364,10 @@ done: * Modifications: */ herr_t -H5FP_request_write_metadata(H5F_t *file, unsigned file_id, uint8_t *obj_oid, - H5FP_obj_t obj_type, H5AC_subid_t type_id, - haddr_t addr, int mdata_size, const char *mdata, - unsigned *req_id, H5FP_status_t *status) +H5FP_request_write_metadata(H5FD_t *file, unsigned file_id, uint8_t *obj_oid, + H5AC_subid_t type_id, haddr_t addr, int mdata_size, + const char *mdata, unsigned *req_id, + H5FP_status_t *status) { H5FP_reply sap_reply; H5FP_read sap_read; /* metadata info read from the SAP's cache */ @@ -388,7 +392,6 @@ H5FP_request_write_metadata(H5F_t *file, unsigned file_id, uint8_t *obj_oid, req.req_id = H5FP_gen_request_id(); req.proc_rank = H5FP_my_rank; req.file_id = file_id; - req.obj_type = obj_type; req.type_id = type_id; req.addr = addr; req.md_size = mdata_size; @@ -407,9 +410,10 @@ H5FP_request_write_metadata(H5F_t *file, unsigned file_id, uint8_t *obj_oid, H5FP_TAG_REPLY, H5FP_SAP_COMM, &mpi_status)) != MPI_SUCCESS) HMPI_GOTO_ERROR(FAIL, "MPI_Recv failed", mrc); - *status = sap_reply.status; - - switch (*status) { + switch (sap_reply.status) { + case H5FP_STATUS_OK: + /* Nothing to do... */ + break; case H5FP_STATUS_DUMPING: /* * Collect the metadata updates from the SAP and write them to @@ -433,13 +437,13 @@ H5FP_request_write_metadata(H5F_t *file, unsigned file_id, uint8_t *obj_oid, } break; - case H5FP_STATUS_OK: - /* Nothing to do... */ - break; default: + *status = sap_reply.status; HGOTO_ERROR(H5E_RESOURCE, H5E_CANTCHANGE, FAIL, "can't write metadata to server"); } + *status = H5FP_STATUS_OK; + done: *req_id = req.req_id; FUNC_LEAVE_NOAPI(ret_value); @@ -462,7 +466,7 @@ done: * Modifications: */ herr_t -H5FP_request_close(H5F_t *file, unsigned file_id, unsigned *req_id, +H5FP_request_close(H5FD_t *file, unsigned file_id, unsigned *req_id, H5FP_status_t *status) { H5FP_reply sap_reply; @@ -495,9 +499,10 @@ H5FP_request_close(H5F_t *file, unsigned file_id, unsigned *req_id, H5FP_TAG_REPLY, H5FP_SAP_COMM, &mpi_status)) != MPI_SUCCESS) HMPI_GOTO_ERROR(FAIL, "MPI_Recv failed", mrc); - *status = sap_reply.status; - - switch (*status) { + switch (sap_reply.status) { + case H5FP_STATUS_OK: + /* Nothing to do... */ + break; case H5FP_STATUS_DUMPING: /* * Collect the metadata updates from the SAP and write them to @@ -521,13 +526,13 @@ H5FP_request_close(H5F_t *file, unsigned file_id, unsigned *req_id, } break; - case H5FP_STATUS_OK: - /* Nothing to do... */ - break; default: + *status = sap_reply.status; HGOTO_ERROR(H5E_RESOURCE, H5E_CANTCHANGE, FAIL, "can't write metadata to server"); } + *status = H5FP_STATUS_OK; + done: *req_id = req.req_id; FUNC_LEAVE_NOAPI(ret_value); @@ -564,7 +569,7 @@ H5FP_gen_request_id() * Modifications: */ static herr_t -H5FP_dump_to_file(H5F_t *file, H5FP_read *sap_read) +H5FP_dump_to_file(H5FD_t *file, H5FP_read *sap_read) { herr_t ret_value = SUCCEED; diff --git a/src/H5FPprivate.h b/src/H5FPprivate.h index dcd79c2..43caed9 100644 --- a/src/H5FPprivate.h +++ b/src/H5FPprivate.h @@ -222,24 +222,23 @@ extern herr_t H5FP_sap_receive_loop(void); /* Use these functions to communicate with the SAP */ extern herr_t H5FP_request_open(const char *mdata, int md_len, H5FP_obj_t obj_type, - unsigned *file_id, unsigned *req_id); + haddr_t maxaddr, unsigned *file_id, unsigned *req_id); extern herr_t H5FP_request_lock(unsigned sap_file_id, unsigned char *mdata, H5FP_lock_t rw_lock, int last, unsigned *req_id, H5FP_status_t *status); extern herr_t H5FP_request_release_lock(unsigned sap_file_id, unsigned char *mdata, int last, unsigned *req_id, H5FP_status_t *status); -extern herr_t H5FP_request_read_metadata(H5F_t *file, unsigned sap_file_id, - H5FP_obj_t obj_type, H5AC_subid_t type_id, - haddr_t addr, size_t size, - uint8_t **buf, unsigned *req_id, - H5FP_status_t *status); -extern herr_t H5FP_request_write_metadata(H5F_t *file, unsigned sap_file_id, - unsigned char *obj_oid, H5FP_obj_t obj_type, +extern herr_t H5FP_request_read_metadata(H5FD_t *file, unsigned sap_file_id, + H5FD_mem_t mem_type, haddr_t addr, + size_t size, uint8_t **buf, + unsigned *req_id, H5FP_status_t *status); +extern herr_t H5FP_request_write_metadata(H5FD_t *file, unsigned sap_file_id, + unsigned char *obj_oid, H5AC_subid_t type_id, haddr_t addr, int mdata_len, const char *mdata, unsigned *req_id, H5FP_status_t *status); -extern herr_t H5FP_request_close(H5F_t *file, unsigned sap_file_id, unsigned *req_id, +extern herr_t H5FP_request_close(H5FD_t *file, unsigned sap_file_id, unsigned *req_id, H5FP_status_t *status); #ifdef __cplusplus diff --git a/src/H5FPserver.c b/src/H5FPserver.c index 6f44933..a571e19 100644 --- a/src/H5FPserver.c +++ b/src/H5FPserver.c @@ -62,6 +62,7 @@ typedef struct { } H5FP_mdata_mod; typedef struct { + H5FD_t file; /* file driver structure */ unsigned file_id; /* the file id the SAP keeps per file */ char *filename; /* the filename - of dubious use */ int closing; /* we're closing the file - no more changes */ @@ -99,7 +100,7 @@ static herr_t H5FP_remove_object_lock_from_list(H5FP_file_info *info, H5FP_object_lock *ol); /* local file information handling functions */ -static herr_t H5FP_add_new_file_info_to_list(unsigned file_id, char *filename); +static herr_t H5FP_add_new_file_info_to_list(unsigned file_id, char *filename, haddr_t maxaddr); static int H5FP_file_info_cmp(H5FP_file_info *k1, H5FP_file_info *k2, int cmparg); static H5FP_file_info *H5FP_new_file_info_node(unsigned file_id, char *filename); static H5FP_file_info *H5FP_find_file_info(unsigned file_id); @@ -600,13 +601,13 @@ H5FP_find_file_info(unsigned file_id) /* * Function: H5FP_add_new_file_info_to_list * Purpose: Add a FILE_ID to the list of file IDS. - * Return: SUCCEED if the node was added - * FAIL otherwise + * Return: Success: SUCCEED + * Failure: FAIL * Programmer: Bill Wendling, 02. August, 2002 * Modifications: */ static herr_t -H5FP_add_new_file_info_to_list(unsigned file_id, char *filename) +H5FP_add_new_file_info_to_list(unsigned file_id, char *filename, haddr_t maxaddr) { H5FP_file_info *info; herr_t ret_value = FAIL; @@ -620,6 +621,13 @@ H5FP_add_new_file_info_to_list(unsigned file_id, char *filename) "can't insert file structure into tree"); } + /* + * Initialize some of the information needed for metadata + * allocation requests + */ + info->file.maxaddr = maxaddr; + info->file.accum_loc = HADDR_UNDEF; + HDmemset(info->file.fl, 0, sizeof(info->file.fl)); ret_value = SUCCEED; } @@ -804,36 +812,16 @@ H5FP_sap_handle_open_request(H5FP_request req, char *mdata, unsigned UNUSED md_s if (req.obj_type == H5FP_OBJ_FILE) { unsigned new_file_id = H5FP_gen_sap_file_id(); - int i; - if (H5FP_add_new_file_info_to_list(new_file_id, mdata) != SUCCEED) + /* N.B. At this point, req.addr is equiv. to maxaddr in H5FD_open() */ + if (H5FP_add_new_file_info_to_list(new_file_id, mdata, req.addr) != SUCCEED) HGOTO_ERROR(H5E_FPHDF5, H5E_CANTINSERT, FAIL, "can't insert new file structure to list"); - /* broadcast the file id to all processes */ - /* - * FIXME: Isn't there some way to broadcast this result to the - * barrier group? -QAK - */ - /* - * XXX: - * MPI_Bcast doesn't work in this way and I don't know how - * to get it to work for us. From what I gather, all of the - * processes need to execute the same bit of code (the - * MPI_Bcast function) to get the value to be passed to - * everyone. -BW - */ - for (i = 0; i < H5FP_comm_size; ++i) - if ((unsigned)i != H5FP_sap_rank) - if ((mrc = MPI_Send(&new_file_id, 1, MPI_UNSIGNED, i, - H5FP_TAG_FILE_ID, H5FP_SAP_COMM)) != MPI_SUCCESS) - /* - * FIXME: This is terrible...if we can't send to all - * processes, we should clean the file structure from - * the list and tell all of the other processes that - * we couldn't continue...but how to do that?!? - */ - HMPI_GOTO_ERROR(FAIL, "MPI_Send failed", mrc); + /* file ID gets broadcast via the captain process */ + if ((mrc = MPI_Send(&new_file_id, 1, MPI_UNSIGNED, (int)H5FP_capt_rank, + H5FP_TAG_FILE_ID, H5FP_SAP_COMM)) != MPI_SUCCESS) + HMPI_GOTO_ERROR(FAIL, "MPI_Send failed", mrc); } done: @@ -1248,6 +1236,8 @@ H5FP_sap_handle_write_request(H5FP_request req, char *mdata, unsigned md_size) FUNC_ENTER_NOINIT(H5FP_sap_handle_write_request); if ((info = H5FP_find_file_info(req.file_id)) != NULL) { + H5FP_object_lock *lock; + if (info->num_mods >= H5FP_MDATA_CACHE_HIGHWATER_MARK) { /* * If there are any modifications not written out yet, dump @@ -1270,30 +1260,32 @@ H5FP_sap_handle_write_request(H5FP_request req, char *mdata, unsigned md_size) if (info->closing) { /* we're closing the file - don't accept anymore changes */ exit_state = H5FP_STATUS_FILE_CLOSING; - ret_value = FAIL; - } else { - /* handle the change request */ - H5FP_object_lock *lock = H5FP_find_object_lock(info, req.oid); + HGOTO_DONE(FAIL); + } - if (!lock || lock->owned_rank != req.proc_rank - || lock->rw_lock != H5FP_LOCK_WRITE) { - /* - * There isn't a write lock or we don't own the write - * lock on this OID - */ - exit_state = H5FP_STATUS_NO_LOCK; - ret_value = FAIL; - } else if (H5FP_add_file_mod_to_list(info, req.mem_type, req.type_id, - req.addr, req.proc_rank, md_size, - mdata) != SUCCEED) { - exit_state = H5FP_STATUS_OOM; - ret_value = FAIL; - } + /* handle the change request */ + lock = H5FP_find_object_lock(info, req.oid); + + if (!lock || lock->owned_rank != req.proc_rank + || lock->rw_lock != H5FP_LOCK_WRITE) { + /* + * There isn't a write lock or we don't own the write + * lock on this OID + */ + exit_state = H5FP_STATUS_NO_LOCK; + HGOTO_DONE(FAIL); + } + + if (H5FP_add_file_mod_to_list(info, req.mem_type, req.type_id, + req.addr, req.proc_rank, md_size, + mdata) != SUCCEED) { + exit_state = H5FP_STATUS_OOM; + HGOTO_DONE(FAIL); } } else { /* error: there isn't a file opened to change */ exit_state = H5FP_STATUS_BAD_FILE_ID; - ret_value = FAIL; + HGOTO_DONE(FAIL); } done: -- cgit v0.12