diff options
author | Quincey Koziol <koziol@hdfgroup.org> | 2004-01-31 01:38:44 (GMT) |
---|---|---|
committer | Quincey Koziol <koziol@hdfgroup.org> | 2004-01-31 01:38:44 (GMT) |
commit | 138bc92ebdb7c6e1ad379dcdabae21bf0a79ab0d (patch) | |
tree | 046bd488f60127ac3a6ba0edbd482b44f022c788 /src | |
parent | f499912c3247e592a0eeef7207b917428756b094 (diff) | |
download | hdf5-138bc92ebdb7c6e1ad379dcdabae21bf0a79ab0d.zip hdf5-138bc92ebdb7c6e1ad379dcdabae21bf0a79ab0d.tar.gz hdf5-138bc92ebdb7c6e1ad379dcdabae21bf0a79ab0d.tar.bz2 |
[svn-r8126] Purpose:
Bug fix/optimization
Description:
Address slowdown in MPI-I/O file metadata operations that was introduced
mid-stream. We now _require_ a POSIX compliant parallel file system for the
MPI-I/O file driver (as well as for the MPI-POSIX file driver).
Also optimized file open operation when the file is being created by
reducing the number of collective & syncronizing calls.
Additionally, refactor the MPI routines into a common place, eliminating
duplicated code.
Platforms tested:
FreeBSD 4.9 (sleipnir) w/parallel
h5committest
Diffstat (limited to 'src')
-rw-r--r-- | src/H5Dcontig.c | 32 | ||||
-rw-r--r-- | src/H5Distore.c | 34 | ||||
-rw-r--r-- | src/H5F.c | 30 | ||||
-rw-r--r-- | src/H5FD.c | 76 | ||||
-rw-r--r-- | src/H5FDcore.c | 2 | ||||
-rw-r--r-- | src/H5FDfamily.c | 2 | ||||
-rw-r--r-- | src/H5FDfphdf5.c | 346 | ||||
-rw-r--r-- | src/H5FDfphdf5.h | 12 | ||||
-rw-r--r-- | src/H5FDgass.c | 2 | ||||
-rw-r--r-- | src/H5FDlog.c | 2 | ||||
-rw-r--r-- | src/H5FDmpi.c | 538 | ||||
-rw-r--r-- | src/H5FDmpi.h | 93 | ||||
-rw-r--r-- | src/H5FDmpio.c | 742 | ||||
-rw-r--r-- | src/H5FDmpio.h | 17 | ||||
-rw-r--r-- | src/H5FDmpiposix.c | 249 | ||||
-rw-r--r-- | src/H5FDmpiposix.h | 7 | ||||
-rw-r--r-- | src/H5FDprivate.h | 11 | ||||
-rw-r--r-- | src/H5FDsec2.c | 2 | ||||
-rw-r--r-- | src/H5FDsrb.c | 2 | ||||
-rw-r--r-- | src/H5FDstream.c | 2 | ||||
-rw-r--r-- | src/H5FPclient.c | 12 | ||||
-rw-r--r-- | src/H5FPserver.c | 11 | ||||
-rw-r--r-- | src/H5Fcontig.c | 32 | ||||
-rw-r--r-- | src/H5Fistore.c | 34 | ||||
-rw-r--r-- | src/H5Fpkg.h | 8 | ||||
-rw-r--r-- | src/H5Smpio.c | 4 | ||||
-rw-r--r-- | src/Makefile.in | 4 | ||||
-rw-r--r-- | src/hdf5.h | 4 |
28 files changed, 1130 insertions, 1180 deletions
diff --git a/src/H5Dcontig.c b/src/H5Dcontig.c index 32f1759..4e19a23 100644 --- a/src/H5Dcontig.c +++ b/src/H5Dcontig.c @@ -152,44 +152,18 @@ H5F_contig_fill(H5F_t *f, hid_t dxpl_id, struct H5O_layout_t *layout, #ifdef H5_HAVE_PARALLEL /* Retrieve MPI parameters */ - if(IS_H5FD_MPIO(f)) { + if(IS_H5FD_MPI(f)) { /* Get the MPI communicator */ - if (MPI_COMM_NULL == (mpi_comm=H5FD_mpio_communicator(f->shared->lf))) + if (MPI_COMM_NULL == (mpi_comm=H5FD_mpi_get_comm(f->shared->lf))) HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI communicator"); /* Get the MPI rank & size */ - if ((mpi_rank=H5FD_mpio_mpi_rank(f->shared->lf))<0) + if ((mpi_rank=H5FD_mpi_get_rank(f->shared->lf))<0) HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank"); /* Set the MPI-capable file driver flag */ using_mpi=1; } /* end if */ - else if(IS_H5FD_MPIPOSIX(f)) { - /* Get the MPI communicator */ - if (MPI_COMM_NULL == (mpi_comm=H5FD_mpiposix_communicator(f->shared->lf))) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI communicator"); - - /* Get the MPI rank & size */ - if ((mpi_rank=H5FD_mpiposix_mpi_rank(f->shared->lf))<0) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank"); - - /* Set the MPI-capable file driver flag */ - using_mpi=1; - } /* end if */ -#ifdef H5_HAVE_FPHDF5 - else if (IS_H5FD_FPHDF5(f)) { - /* Get the FPHDF5 barrier communicator */ - if (MPI_COMM_NULL == (mpi_comm = H5FD_fphdf5_barrier_communicator(f->shared->lf))) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI communicator"); - - /* Get the MPI rank & size */ - if ((mpi_rank = H5FD_fphdf5_mpi_rank(f->shared->lf)) < 0) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank"); - - /* Set the MPI-capable file driver flag */ - using_mpi = 1; - } /* end if */ -#endif /* H5_HAVE_FPHDF5 */ #endif /* H5_HAVE_PARALLEL */ /* Get the number of elements in the dataset's dataspace */ diff --git a/src/H5Distore.c b/src/H5Distore.c index 90a9c11..b726c68 100644 --- a/src/H5Distore.c +++ b/src/H5Distore.c @@ -2195,45 +2195,19 @@ H5F_istore_allocate(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout, HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get filter callback struct"); #ifdef H5_HAVE_PARALLEL - /* Retrieve up MPI parameters */ - if(IS_H5FD_MPIO(f)) { + /* Retrieve MPI parameters */ + if(IS_H5FD_MPI(f)) { /* Get the MPI communicator */ - if (MPI_COMM_NULL == (mpi_comm=H5FD_mpio_communicator(f->shared->lf))) + if (MPI_COMM_NULL == (mpi_comm=H5FD_mpi_get_comm(f->shared->lf))) HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI communicator"); /* Get the MPI rank & size */ - if ((mpi_rank=H5FD_mpio_mpi_rank(f->shared->lf))<0) + if ((mpi_rank=H5FD_mpi_get_rank(f->shared->lf))<0) HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank"); /* Set the MPI-capable file driver flag */ using_mpi=1; } /* end if */ - else if(IS_H5FD_MPIPOSIX(f)) { - /* Get the MPI communicator */ - if (MPI_COMM_NULL == (mpi_comm=H5FD_mpiposix_communicator(f->shared->lf))) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI communicator"); - - /* Get the MPI rank & size */ - if ((mpi_rank=H5FD_mpiposix_mpi_rank(f->shared->lf))<0) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank"); - - /* Set the MPI-capable file driver flag */ - using_mpi=1; - } /* end else */ -#ifdef H5_HAVE_FPHDF5 - else if (IS_H5FD_FPHDF5(f)) { - /* Get the FPHDF5 barrier communicator */ - if (MPI_COMM_NULL == (mpi_comm = H5FD_fphdf5_barrier_communicator(f->shared->lf))) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI communicator"); - - /* Get the MPI rank & size */ - if ((mpi_rank = H5FD_fphdf5_mpi_rank(f->shared->lf)) < 0) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank"); - - /* Set the MPI-capable file driver flag */ - using_mpi = 1; - } /* end if */ -#endif /* H5_HAVE_FPHDF5 */ #endif /* H5_HAVE_PARALLEL */ /* @@ -35,18 +35,16 @@ #include "H5Tprivate.h" /* Datatypes */ /* Predefined file drivers */ -#include "H5FDcore.h" /*temporary in-memory files */ -#include "H5FDfamily.h" /*family of files */ -#include "H5FDfphdf5.h" /*FPHDF5 */ -#include "H5FDgass.h" /*GASS I/O */ +#include "H5FDcore.h" /*temporary in-memory files */ +#include "H5FDfamily.h" /*family of files */ +#include "H5FDgass.h" /*GASS I/O */ #include "H5FDlog.h" /* sec2 driver with logging, for debugging */ -#include "H5FDmpio.h" /*MPI-2 I/O */ -#include "H5FDmpiposix.h" /*MPI-2 & posix I/O */ +#include "H5FDmpi.h" /* MPI-based file drivers */ #include "H5FDmulti.h" /*multiple files partitioned by mem usage */ -#include "H5FDsec2.h" /*Posix unbuffered I/O */ -#include "H5FDsrb.h" /*SRB I/O */ -#include "H5FDstdio.h" /* Standard C buffered I/O */ -#include "H5FDstream.h" /*in-memory files streamed via sockets */ +#include "H5FDsec2.h" /*Posix unbuffered I/O */ +#include "H5FDsrb.h" /*SRB I/O */ +#include "H5FDstdio.h" /* Standard C buffered I/O */ +#include "H5FDstream.h" /*in-memory files streamed via sockets */ /* Interface initialization */ static int interface_initialize_g = 0; @@ -229,18 +227,6 @@ H5F_init_interface(void) FUNC_ENTER_NOAPI_NOINIT(H5F_init_interface) -#ifdef OLD_METADATA_WRITE -#ifdef H5_HAVE_PARALLEL - { - /* Allow MPI buf-and-file-type optimizations? */ - const char *s = HDgetenv ("HDF5_MPI_1_METAWRITE"); - if (s && HDisdigit(*s)) { - H5_mpiposix_1_metawrite_g = H5_mpi_1_metawrite_g = (int)HDstrtol (s, NULL, 0); - } - } -#endif /* H5_HAVE_PARALLEL */ -#endif /* OLD_METADATA_WRITE */ - /* * Initialize the atom group for the file IDs. There are two groups: * the H5I_FILE group contains all the ID's for files which are currently @@ -213,6 +213,9 @@ H5FD_free_cls(H5FD_class_t *cls) * Monday, July 26, 1999 * * Modifications: + * Copied guts of function info H5FD_register + * Quincey Koziol + * Friday, January 30, 2004 * *------------------------------------------------------------------------- */ @@ -220,7 +223,6 @@ hid_t H5FDregister(const H5FD_class_t *cls) { hid_t ret_value; - H5FD_class_t *saved=NULL; H5FD_mem_t type; FUNC_ENTER_API(H5FDregister, FAIL) @@ -229,27 +231,77 @@ H5FDregister(const H5FD_class_t *cls) /* Check arguments */ if (!cls) HGOTO_ERROR(H5E_ARGS, H5E_UNINITIALIZED, FAIL, "null class pointer is disallowed") - if (!cls->open || !cls->close) HGOTO_ERROR(H5E_ARGS, H5E_UNINITIALIZED, FAIL, "`open' and/or `close' methods are not defined") - if (!cls->get_eoa || !cls->set_eoa) HGOTO_ERROR(H5E_ARGS, H5E_UNINITIALIZED, FAIL, "`get_eoa' and/or `set_eoa' methods are not defined") - if (!cls->get_eof) HGOTO_ERROR(H5E_ARGS, H5E_UNINITIALIZED, FAIL, "`get_eof' method is not defined") if (!cls->read || !cls->write) HGOTO_ERROR(H5E_ARGS, H5E_UNINITIALIZED, FAIL, "`read' and/or `write' method is not defined") - for (type=H5FD_MEM_DEFAULT; type<H5FD_MEM_NTYPES; H5_INC_ENUM(H5FD_mem_t,type)) { - if (cls->fl_map[type]<H5FD_MEM_NOLIST || - cls->fl_map[type]>=H5FD_MEM_NTYPES) + for (type=H5FD_MEM_DEFAULT; type<H5FD_MEM_NTYPES; H5_INC_ENUM(H5FD_mem_t,type)) + if (cls->fl_map[type]<H5FD_MEM_NOLIST || cls->fl_map[type]>=H5FD_MEM_NTYPES) HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid free-list mapping") - } + + /* Create the new class ID */ + if ((ret_value=H5FD_register(cls, sizeof(H5FD_class_t)))<0) + HGOTO_ERROR(H5E_ATOM, H5E_CANTREGISTER, FAIL, "unable to register file driver ID") + +done: + FUNC_LEAVE_API(ret_value) +} /* end H5FDregister() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_register + * + * Purpose: Registers a new file driver as a member of the virtual file + * driver class. Certain fields of the class struct are + * required and that is checked here so it doesn't have to be + * checked every time the field is accessed. + * + * Return: Success: A file driver ID which is good until the + * library is closed or the driver is + * unregistered. + * + * Failure: A negative value. + * + * Programmer: Robb Matzke + * Monday, July 26, 1999 + * + * Modifications: + * Broke into public and internal routines & added 'size' + * parameter to internal routine, which allows us to create + * sub-classes of H5FD_class_t for internal support (see the + * MPI drivers, etc.) + * Quincey Koziol + * January 30, 2004 + * + *------------------------------------------------------------------------- + */ +hid_t +H5FD_register(const void *_cls, size_t size) +{ + hid_t ret_value; + const H5FD_class_t *cls=(const H5FD_class_t *)_cls; + H5FD_class_t *saved=NULL; + H5FD_mem_t type; + + FUNC_ENTER_NOAPI(H5FD_register, FAIL) + + /* Check arguments */ + assert(cls); + assert(cls->open && cls->close); + assert(cls->get_eoa && cls->set_eoa); + assert(cls->get_eof); + assert(cls->read && cls->write); + for (type=H5FD_MEM_DEFAULT; type<H5FD_MEM_NTYPES; H5_INC_ENUM(H5FD_mem_t,type)) + assert(cls->fl_map[type]>=H5FD_MEM_NOLIST && cls->fl_map[type]<H5FD_MEM_NTYPES); /* Copy the class structure so the caller can reuse or free it */ - if (NULL==(saved=H5MM_malloc(sizeof(H5FD_class_t)))) + if (NULL==(saved=H5MM_malloc(size))) HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for file driver class struct") - *saved = *cls; + HDmemcpy(saved,cls,size); /* Create the new class ID */ if ((ret_value=H5I_register(H5I_VFL, saved))<0) @@ -260,8 +312,8 @@ done: if(saved) H5MM_xfree(saved); - FUNC_LEAVE_API(ret_value) -} + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_register() */ /*------------------------------------------------------------------------- diff --git a/src/H5FDcore.c b/src/H5FDcore.c index 5db5852..f803ffe 100644 --- a/src/H5FDcore.c +++ b/src/H5FDcore.c @@ -168,7 +168,7 @@ H5FD_core_init(void) FUNC_ENTER_NOAPI(H5FD_core_init, FAIL) if (H5I_VFL!=H5Iget_type(H5FD_CORE_g)) - H5FD_CORE_g = H5FDregister(&H5FD_core_g); + H5FD_CORE_g = H5FD_register(&H5FD_core_g,sizeof(H5FD_class_t)); /* Set return value */ ret_value=H5FD_CORE_g; diff --git a/src/H5FDfamily.c b/src/H5FDfamily.c index a7042fd..6b198ea 100644 --- a/src/H5FDfamily.c +++ b/src/H5FDfamily.c @@ -162,7 +162,7 @@ H5FD_family_init(void) FUNC_ENTER_NOAPI(H5FD_family_init, FAIL) if (H5I_VFL!=H5Iget_type(H5FD_FAMILY_g)) - H5FD_FAMILY_g = H5FDregister(&H5FD_family_g); + H5FD_FAMILY_g = H5FD_register(&H5FD_family_g,sizeof(H5FD_class_t)); /* Set return value */ ret_value=H5FD_FAMILY_g; diff --git a/src/H5FDfphdf5.c b/src/H5FDfphdf5.c index e2aae8a..04b9824 100644 --- a/src/H5FDfphdf5.c +++ b/src/H5FDfphdf5.c @@ -22,8 +22,7 @@ #include "H5Eprivate.h" /* Error handling */ #include "H5Fprivate.h" /* Files access */ #include "H5FDprivate.h" /* File drivers */ -#include "H5FDfphdf5.h" /* Flexible PHDF5 file driver */ -#include "H5FDmpio.h" /* MPI I/O file driver */ +#include "H5FDmpi.h" /* MPI-based file drivers */ #include "H5Iprivate.h" /* IDs */ #include "H5MMprivate.h" /* Memory management */ #include "H5Pprivate.h" /* Property lists */ @@ -41,10 +40,8 @@ static hid_t H5FD_FPHDF5_g = 0; /* - * Prototypes + * Private Prototypes */ -static haddr_t H5FD_fphdf5_MPIOff_to_haddr(MPI_Offset mpi_off); -static herr_t H5FD_fphdf5_haddr_to_MPIOff(haddr_t addr, MPI_Offset *mpi_off); /* * Callbacks @@ -64,6 +61,9 @@ static herr_t H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_ static herr_t H5FD_fphdf5_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, size_t size, const void *buf); static herr_t H5FD_fphdf5_flush(H5FD_t *_file, hid_t dxpl_id, unsigned closing); +static int H5FD_fphdf5_mpi_rank(const H5FD_t *_file); +static int H5FD_fphdf5_mpi_size(const H5FD_t *_file); +static MPI_Comm H5FD_fphdf5_barrier_communicator(const H5FD_t *_file); /* * FPHDF5-specific file access properties @@ -79,7 +79,8 @@ typedef struct H5FD_fphdf5_fapl_t { /* * The FPHDF5 file driver information */ -const H5FD_class_t H5FD_fphdf5_g = { +const H5FD_class_mpi_t H5FD_fphdf5_g = { + { /* Start of superclass information */ "fphdf5", /*name */ HADDR_MAX, /*maxaddr */ H5F_CLOSE_SEMI, /*fc_degree */ @@ -109,30 +110,16 @@ const H5FD_class_t H5FD_fphdf5_g = { NULL, /*lock */ NULL, /*unlock */ H5FD_FLMAP_SINGLE /*fl_map */ + }, /* End of superclass information */ + H5FD_fphdf5_mpi_rank, /*get_rank */ + H5FD_fphdf5_mpi_size, /*get_size */ + H5FD_fphdf5_barrier_communicator /*get_comm */ }; /* Interface initialization */ #define INTERFACE_INIT H5FD_fphdf5_init static int interface_initialize_g = 0; -/* - * The view is set to this value - */ -static char H5FD_mpio_native[] = "native"; - -/* ======== Temporary, Local data transfer properties ======== */ -/* - * Definitions for memory MPI type property - */ -#define H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME "H5FD_fphdf5_mem_mpi_type" -#define H5FD_FPHDF5_XFER_MEM_MPI_TYPE_SIZE sizeof(MPI_Datatype) - -/* - * Definitions for file MPI type property - */ -#define H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME "H5FD_fphdf5_file_mpi_type" -#define H5FD_FPHDF5_XFER_FILE_MPI_TYPE_SIZE sizeof(MPI_Datatype) - /*------------------------------------------------------------------------- * Function: H5FD_fphdf5_init @@ -153,7 +140,7 @@ H5FD_fphdf5_init(void) FUNC_ENTER_NOAPI(H5FD_fphdf5_init, FAIL) if (H5Iget_type(H5FD_FPHDF5_g) != H5I_VFL) - H5FD_FPHDF5_g = H5FDregister(&H5FD_fphdf5_g); + H5FD_FPHDF5_g = H5FD_register((const H5FD_class_t *)&H5FD_fphdf5_g,sizeof(H5FD_class_mpi_t)); /* Set return value */ ret_value = H5FD_FPHDF5_g; @@ -288,127 +275,6 @@ done: /*------------------------------------------------------------------------- - * Function: H5FD_fphdf5_communicator - * Purpose: Returns the MPI communicator for the file. - * Return: Success: The communicator - * Failure: NULL - * Programmer: Bill Wendling - * 30. January 2003 - * Modifications: - *------------------------------------------------------------------------- - */ -MPI_Comm -H5FD_fphdf5_communicator(H5FD_t *_file) -{ - H5FD_fphdf5_t *file = (H5FD_fphdf5_t*)_file; - MPI_Comm ret_value; - - FUNC_ENTER_NOAPI(H5FD_fphdf5_communicator, MPI_COMM_NULL) - - /* check args */ - assert(file); - assert(file->pub.driver_id == H5FD_FPHDF5); - - /* Set return value */ - ret_value = file->comm; - -done: - FUNC_LEAVE_NOAPI(ret_value) -} - -/*------------------------------------------------------------------------- - * Function: H5FD_fphdf5_barrier_communicator - * Purpose: Returns the MPI communicator for the file that can be - * used in an MPI_Barrier() statement for the client - * processes. - * Return: Success: The barrier communicator - * Failure: NULL - * Programmer: Bill Wendling - * 10. February 2003 - * Modifications: - *------------------------------------------------------------------------- - */ -MPI_Comm -H5FD_fphdf5_barrier_communicator(H5FD_t *_file) -{ - H5FD_fphdf5_t *file = (H5FD_fphdf5_t*)_file; - MPI_Comm ret_value; - - FUNC_ENTER_NOAPI(H5FD_fphdf5_communicator, MPI_COMM_NULL) - - /* check args */ - assert(file); - assert(file->pub.driver_id == H5FD_FPHDF5); - - /* Set return value */ - ret_value = file->barrier_comm; - -done: - FUNC_LEAVE_NOAPI(ret_value) -} - - -/*------------------------------------------------------------------------- - * Function: H5FD_fphdf5_mpi_rank - * Purpose: Returns the MPI rank for a process - * Return: Success: MPI rank - * Failure: Doesn't fail - * Programmer: Bill Wendling - * 30. January 2003 - * Modifications: - *------------------------------------------------------------------------- - */ -int -H5FD_fphdf5_mpi_rank(H5FD_t *_file) -{ - H5FD_fphdf5_t *file = (H5FD_fphdf5_t*)_file; - int ret_value; - - FUNC_ENTER_NOAPI(H5FD_fphdf5_mpi_rank, FAIL) - - /* check args */ - assert(file); - assert(file->pub.driver_id == H5FD_FPHDF5); - - /* Set return value */ - ret_value = file->mpi_rank; - -done: - FUNC_LEAVE_NOAPI(ret_value) -} - - -/*------------------------------------------------------------------------- - * Function: H5FD_fphdf5_mpi_size - * Purpose: Returns the number of MPI processes - * Return: Success: Number of MPI processes - * Failure: Doesn't fail - * Programmer: Bill Wendling - * 30. January 2003 - * Modifications: - *------------------------------------------------------------------------- - */ -int -H5FD_fphdf5_mpi_size(H5FD_t *_file) -{ - H5FD_fphdf5_t *file = (H5FD_fphdf5_t*)_file; - int ret_value; - - FUNC_ENTER_NOAPI(H5FD_fphdf5_mpi_size, FAIL) - - /* check args */ - assert(file); - assert(file->pub.driver_id == H5FD_FPHDF5); - - /* Set return value */ - ret_value = file->mpi_size; - -done: - FUNC_LEAVE_NOAPI(ret_value) -} - - -/*------------------------------------------------------------------------- * Function: H5FD_fphdf5_file_id * Purpose: Returns the file ID for the file. * Return: Success: File ID @@ -528,82 +394,6 @@ done: /*------------------------------------------------------------------------- - * Function: H5FD_fphdf5_setup - * Purpose: Set the buffer type BTYPE, file type FTYPE for a data - * transfer. Also request an MPI type transfer. - * Return: Success: SUCCEED - * Failure: FAIL - * Programmer: Bill Wendling - * 30. January 2003 - * Modifications: - *------------------------------------------------------------------------- - */ -herr_t -H5FD_fphdf5_setup(hid_t dxpl_id, MPI_Datatype btype, - MPI_Datatype ftype) -{ - H5P_genplist_t *plist; - herr_t ret_value = SUCCEED; - - FUNC_ENTER_NOAPI(H5FD_fphdf5_setup, FAIL) - - /* Check arguments */ - if ((plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)) == NULL) - HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dataset transfer list") - - /* Set buffer MPI type */ - if (H5P_insert(plist, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME, - H5FD_FPHDF5_XFER_MEM_MPI_TYPE_SIZE, &btype, - NULL, NULL, NULL, NULL, NULL, NULL) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't insert MPI-I/O property") - - /* Set file MPI type */ - if (H5P_insert(plist, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME, - H5FD_FPHDF5_XFER_FILE_MPI_TYPE_SIZE, &ftype, - NULL, NULL, NULL, NULL, NULL, NULL) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't insert MPI-I/O property") - -done: - FUNC_LEAVE_NOAPI(ret_value) -} - - -/*------------------------------------------------------------------------- - * Function: H5FD_fphdf5_teardown - * Purpose: Remove the temporary MPI-I/O properties from dxpl. - * Return: Success: SUCCEED - * Failure: FAIL - * Programmer: Bill Wendling - * 30. January 2003 - * Modifications: - *------------------------------------------------------------------------- - */ -herr_t -H5FD_fphdf5_teardown(hid_t dxpl_id) -{ - H5P_genplist_t *plist; - herr_t ret_value = SUCCEED; - - FUNC_ENTER_NOAPI(H5FD_fphdf5_teardown, FAIL) - - /* Check arguments */ - if ((plist = H5P_object_verify(dxpl_id, H5P_DATASET_XFER)) == NULL) - HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dataset transfer list") - - /* Remove buffer MPI type */ - if (H5P_remove(dxpl_id, plist, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTDELETE, FAIL, "can't remove MPI-I/O property") - - /* Remove file MPI type */ - if (H5P_remove(dxpl_id, plist, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTDELETE, FAIL, "can't remove MPI-I/O property") - -done: - FUNC_LEAVE_NOAPI(ret_value) -} - - -/*------------------------------------------------------------------------- * Function: H5Pset_dxpl_fphdf5 * Purpose: Set the data transfer property list DXPL_ID to use * transfer mode XFER_MODE. The property list can then be @@ -876,7 +666,7 @@ H5FD_fphdf5_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxadd file->info = fa->info; file->mpi_rank = mpi_rank; file->mpi_size = mpi_size; - file->eof = H5FD_fphdf5_MPIOff_to_haddr(size); + file->eof = H5FD_mpi_MPIOff_to_haddr(size); /* Set return value */ ret_value = (H5FD_t *)file; @@ -1182,7 +972,7 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id, HDmemset(&status, 0, sizeof(MPI_Status)); /* Some numeric conversions */ - if (H5FD_fphdf5_haddr_to_MPIOff(addr, &mpi_off) < 0) + if (H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off) < 0) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr_t to MPI offset") size_i = (int)size; @@ -1218,15 +1008,15 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id, use_view_this_time = TRUE; /* Prepare for a full-blown xfer using btype, ftype, and disp */ - if (H5P_get(plist, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0) + if (H5P_get(plist, H5FD_MPI_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") - if (H5P_get(plist, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0) + if (H5P_get(plist, H5FD_MPI_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") /* Set the file view when we are using MPI derived types */ if ((mrc = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, - file_type, H5FD_mpio_native, + file_type, H5FD_mpi_native_g, file->info)) != MPI_SUCCESS) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc) @@ -1245,7 +1035,7 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id, * Reset the file view when we used MPI derived types */ if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, - H5FD_mpio_native, file->info)) != MPI_SUCCESS) + H5FD_mpi_native_g, file->info)) != MPI_SUCCESS) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc) } else { /* @@ -1456,7 +1246,7 @@ H5FD_fphdf5_write_real(H5FD_t *_file, H5FD_mem_t mem_type, H5P_genplist_t *plist HDmemset(&status, 0, sizeof(MPI_Status)); /* some numeric conversions */ - if (H5FD_fphdf5_haddr_to_MPIOff(addr, &mpi_off) < 0) + if (H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off) < 0) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off") /* Only check for fancy transfers with raw data I/O */ @@ -1479,15 +1269,15 @@ H5FD_fphdf5_write_real(H5FD_t *_file, H5FD_mem_t mem_type, H5P_genplist_t *plist use_view_this_time = TRUE; /* Prepare for a full-blown xfer using btype, ftype, and disp */ - if (H5P_get(plist, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0) + if (H5P_get(plist, H5FD_MPI_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") - if (H5P_get(plist, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0) + if (H5P_get(plist, H5FD_MPI_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") /* Set the file view when we are using MPI derived types */ if ((mrc = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, - file_type, H5FD_mpio_native, + file_type, H5FD_mpi_native_g, file->info)) != MPI_SUCCESS) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc) @@ -1508,7 +1298,7 @@ H5FD_fphdf5_write_real(H5FD_t *_file, H5FD_mem_t mem_type, H5P_genplist_t *plist /* Reset the file view when we used MPI derived types */ if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, - H5FD_mpio_native, file->info)) != MPI_SUCCESS) + H5FD_mpi_native_g, file->info)) != MPI_SUCCESS) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc) } /* end if */ else { @@ -1580,7 +1370,7 @@ H5FD_fphdf5_flush(H5FD_t *_file, hid_t dxpl_id, unsigned closing) * back. */ if (file->eoa > file->last_eoa) { - if (H5FD_fphdf5_haddr_to_MPIOff(file->eoa, &mpi_off) < 0) + if (H5FD_mpi_haddr_to_MPIOff(file->eoa, &mpi_off) < 0) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "cannot convert from haddr_t to MPI_Offset") /* Extend the file's size */ @@ -1619,48 +1409,92 @@ done: /*------------------------------------------------------------------------- - * Function: H5FD_fphdf5_MPIOff_to_haddr - * Purpose: Convert an MPI_Offset value to haddr_t. - * Return: Success: The haddr_t equivalent of the MPI_OFF argument. - * Failure: HADDR_UNDEF + * Function: H5FD_fphdf5_mpi_rank + * Purpose: Returns the MPI rank for a process + * Return: Success: MPI rank + * Failure: Doesn't fail * Programmer: Bill Wendling * 30. January 2003 * Modifications: - *------------------------------------------------------------------------- + *------------------------------------------------------------------------- */ -static haddr_t -H5FD_fphdf5_MPIOff_to_haddr(MPI_Offset mpi_off) +static int +H5FD_fphdf5_mpi_rank(const H5FD_t *_file) { - haddr_t ret_value; + const H5FD_fphdf5_t *file = (const H5FD_fphdf5_t*)_file; + int ret_value; + + FUNC_ENTER_NOAPI(H5FD_fphdf5_mpi_rank, FAIL) + + /* check args */ + assert(file); + assert(file->pub.driver_id == H5FD_FPHDF5); - FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5FD_fphdf5_MPIOff_to_haddr) - ret_value = (mpi_off != (MPI_Offset)(haddr_t)mpi_off ? HADDR_UNDEF : (haddr_t)mpi_off); + /* Set return value */ + ret_value = file->mpi_rank; + +done: FUNC_LEAVE_NOAPI(ret_value) } /*------------------------------------------------------------------------- - * Function: H5FD_fphdf5_haddr_to_MPIOff - * Purpose: Convert an haddr_t value to MPI_Offset. - * Return: Success: Non-negative, the MPI_OFF argument contains - * the converted value. - * Failure: FAIL, MPI_OFF is undefined. + * Function: H5FD_fphdf5_mpi_size + * Purpose: Returns the number of MPI processes + * Return: Success: Number of MPI processes + * Failure: Doesn't fail * Programmer: Bill Wendling * 30. January 2003 * Modifications: *------------------------------------------------------------------------- */ -static herr_t -H5FD_fphdf5_haddr_to_MPIOff(haddr_t addr, MPI_Offset *mpi_off) +static int +H5FD_fphdf5_mpi_size(const H5FD_t *_file) { - herr_t ret_value = FAIL; + const H5FD_fphdf5_t *file = (const H5FD_fphdf5_t*)_file; + int ret_value; + + FUNC_ENTER_NOAPI(H5FD_fphdf5_mpi_size, FAIL) - FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5FD_fphdf5_haddr_to_MPIOff) + /* check args */ + assert(file); + assert(file->pub.driver_id == H5FD_FPHDF5); - if (mpi_off) - *mpi_off = (MPI_Offset)addr; + /* Set return value */ + ret_value = file->mpi_size; - ret_value = (addr != (haddr_t)(MPI_Offset)addr ? FAIL : SUCCEED); +done: + FUNC_LEAVE_NOAPI(ret_value) +} + +/*------------------------------------------------------------------------- + * Function: H5FD_fphdf5_barrier_communicator + * Purpose: Returns the MPI communicator for the file that can be + * used in an MPI_Barrier() statement for the client + * processes. + * Return: Success: The barrier communicator + * Failure: NULL + * Programmer: Bill Wendling + * 10. February 2003 + * Modifications: + *------------------------------------------------------------------------- + */ +static MPI_Comm +H5FD_fphdf5_barrier_communicator(const H5FD_t *_file) +{ + const H5FD_fphdf5_t *file = (const H5FD_fphdf5_t*)_file; + MPI_Comm ret_value; + + FUNC_ENTER_NOAPI(H5FD_fphdf5_barrier_communicator, MPI_COMM_NULL) + + /* check args */ + assert(file); + assert(file->pub.driver_id == H5FD_FPHDF5); + + /* Set return value */ + ret_value = file->barrier_comm; + +done: FUNC_LEAVE_NOAPI(ret_value) } diff --git a/src/H5FDfphdf5.h b/src/H5FDfphdf5.h index d77af3c..1e3e0cf 100644 --- a/src/H5FDfphdf5.h +++ b/src/H5FDfphdf5.h @@ -15,9 +15,6 @@ #ifndef H5FDFPHDF5_H__ #define H5FDFPHDF5_H__ -#include "H5FDmpio.h" -#include "H5FDpublic.h" /* for the H5FD_t structure */ - #ifdef H5_HAVE_FPHDF5 # define H5FD_FPHDF5 (H5FD_fphdf5_init()) #else @@ -75,7 +72,7 @@ typedef struct H5FD_fphdf5_t { haddr_t last_eoa; /*Last known end-of-address marker */ } H5FD_fphdf5_t; -extern const H5FD_class_t H5FD_fphdf5_g; +extern const H5FD_class_mpi_t H5FD_fphdf5_g; /* Function prototypes */ #ifdef __cplusplus @@ -105,14 +102,7 @@ H5_DLL herr_t H5Pget_fapl_fphdf5(hid_t fapl_id, MPI_Comm *comm, struct H5P_genplist_t; H5_DLL hid_t H5FD_fphdf5_init(void); -H5_DLL MPI_Comm H5FD_fphdf5_communicator(H5FD_t *_file); -H5_DLL MPI_Comm H5FD_fphdf5_barrier_communicator(H5FD_t *_file); -H5_DLL herr_t H5FD_fphdf5_setup(hid_t dxpl_id, MPI_Datatype btype, - MPI_Datatype ftype); -H5_DLL herr_t H5FD_fphdf5_teardown(hid_t dxpl_id); H5_DLL unsigned H5FD_fphdf5_file_id(H5FD_t *_file); -H5_DLL int H5FD_fphdf5_mpi_rank(H5FD_t *_file); -H5_DLL int H5FD_fphdf5_mpi_size(H5FD_t *_file); H5_DLL hbool_t H5FD_fphdf5_is_sap(H5FD_t *_file); H5_DLL hbool_t H5FD_fphdf5_is_captain(H5FD_t *_file); H5_DLL hbool_t H5FD_is_fphdf5_driver(H5FD_t *_file); diff --git a/src/H5FDgass.c b/src/H5FDgass.c index 0f888b8..42440ab 100644 --- a/src/H5FDgass.c +++ b/src/H5FDgass.c @@ -204,7 +204,7 @@ H5FD_gass_init(void) FUNC_ENTER_NOAPI(H5FD_gass_init, FAIL) if (!H5FD_GASS_g) - H5FD_GASS_g = H5FDregister(&H5FD_gass_g); + H5FD_GASS_g = H5FD_register(&H5FD_gass_g,sizeof(H5FD_class_t)); globus_module_activate (GLOBUS_COMMON_MODULE); globus_module_activate (GLOBUS_GASS_FILE_MODULE); diff --git a/src/H5FDlog.c b/src/H5FDlog.c index 8e7f3f9..e1b84f2 100644 --- a/src/H5FDlog.c +++ b/src/H5FDlog.c @@ -266,7 +266,7 @@ H5FD_log_init(void) FUNC_ENTER_NOAPI(H5FD_log_init, FAIL) if (H5I_VFL!=H5Iget_type(H5FD_LOG_g)) - H5FD_LOG_g = H5FDregister(&H5FD_log_g); + H5FD_LOG_g = H5FD_register(&H5FD_log_g,sizeof(H5FD_class_t)); /* Set return value */ ret_value=H5FD_LOG_g; diff --git a/src/H5FDmpi.c b/src/H5FDmpi.c new file mode 100644 index 0000000..380701d --- /dev/null +++ b/src/H5FDmpi.c @@ -0,0 +1,538 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the files COPYING and Copyright.html. COPYING can be found at the root * + * of the source code distribution tree; Copyright.html can be found at the * + * root level of an installed copy of the electronic HDF5 document set and * + * is linked from the top-level documents page. It can also be found at * + * http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html. If you do not have * + * access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Programmer: Quincey Koziol <koziol@ncsa.uiuc.edu> + * Friday, January 30, 2004 + * + * Purpose: Common routines for all MPI-based VFL drivers. + * + */ + +/* Pablo information */ +/* (Put before include files to avoid problems with inline functions) */ +#define PABLO_MASK H5FD_mpi_mask + +#include "H5private.h" /* Generic Functions */ +#include "H5Eprivate.h" /* Error handling */ +#include "H5Fprivate.h" /* File access */ +#include "H5FDprivate.h" /* File drivers */ +#include "H5FDmpi.h" /* Common MPI file driver */ +#include "H5Pprivate.h" /* Property lists */ + +/* + * The view is set to this value + */ +char H5FD_mpi_native_g[] = "native"; + +#ifdef H5_HAVE_PARALLEL + +/* Interface initialization */ +#define INTERFACE_INIT NULL +static int interface_initialize_g = 0; + + +/*------------------------------------------------------------------------- + * Function: H5FD_mpi_get_rank + * + * Purpose: Retrieves the rank of an MPI process. + * + * Return: Success: The rank (non-negative) + * + * Failure: Negative + * + * Programmer: Quincey Koziol + * Friday, January 30, 2004 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +int +H5FD_mpi_get_rank(const H5FD_t *file) +{ + const H5FD_class_mpi_t *cls=(const H5FD_class_mpi_t *)(file->cls); + int ret_value; + + FUNC_ENTER_NOAPI(H5FD_mpi_get_rank, FAIL) + + assert(file && cls); + assert(cls->get_rank); /* All MPI drivers must implement this */ + + /* Dispatch to driver */ + if ((ret_value=(cls->get_rank)(file))<0) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "driver get_rank request failed") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_mpi_get_rank() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_mpi_get_size + * + * Purpose: Retrieves the size of the communicator used for the file + * + * Return: Success: The communicator size (non-negative) + * + * Failure: Negative + * + * Programmer: Quincey Koziol + * Friday, January 30, 2004 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +int +H5FD_mpi_get_size(const H5FD_t *file) +{ + const H5FD_class_mpi_t *cls=(const H5FD_class_mpi_t *)(file->cls); + int ret_value; + + FUNC_ENTER_NOAPI(H5FD_mpi_get_size, FAIL) + + assert(file && cls); + assert(cls->get_size); /* All MPI drivers must implement this */ + + /* Dispatch to driver */ + if ((ret_value=(cls->get_size)(file))<0) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "driver get_size request failed") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_mpi_get_size() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_mpi_get_comm + * + * Purpose: Retrieves the file's communicator + * + * Return: Success: The communicator (non-negative) + * + * Failure: Negative + * + * Programmer: Quincey Koziol + * Friday, January 30, 2004 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +int +H5FD_mpi_get_comm(const H5FD_t *file) +{ + const H5FD_class_mpi_t *cls=(const H5FD_class_mpi_t *)(file->cls); + int ret_value; + + FUNC_ENTER_NOAPI(H5FD_mpi_get_comm, FAIL) + + assert(file && cls); + assert(cls->get_comm); /* All MPI drivers must implement this */ + + /* Dispatch to driver */ + if ((ret_value=(cls->get_comm)(file))<0) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "driver get_comm request failed") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_mpi_get_comm() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_mpi_MPIOff_to_haddr + * + * Purpose: Convert an MPI_Offset value to haddr_t. + * + * Return: Success: The haddr_t equivalent of the MPI_OFF + * argument. + * + * Failure: HADDR_UNDEF + * + * Programmer: Unknown + * January 30, 1998 + * + * Modifications: + * Robb Matzke, 1999-04-23 + * An error is reported for address overflows. The ADDR output + * argument is optional. + * + * Robb Matzke, 1999-08-06 + * Modified to work with the virtual file layer. + *------------------------------------------------------------------------- + */ +haddr_t +H5FD_mpi_MPIOff_to_haddr(MPI_Offset mpi_off) +{ + haddr_t ret_value=HADDR_UNDEF; + + FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5FD_mpi_MPIOff_to_haddr) + + if (mpi_off != (MPI_Offset)(haddr_t)mpi_off) + ret_value=HADDR_UNDEF; + else + ret_value=(haddr_t)mpi_off; + + FUNC_LEAVE_NOAPI(ret_value) +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_mpi_haddr_to_MPIOff + * + * Purpose: Convert an haddr_t value to MPI_Offset. + * + * Return: Success: Non-negative, the MPI_OFF argument contains + * the converted value. + * + * Failure: Negative, MPI_OFF is undefined. + * + * Programmer: Unknown + * January 30, 1998 + * + * Modifications: + * Robb Matzke, 1999-04-23 + * An error is reported for address overflows. The ADDR output + * argument is optional. + * + * Robb Matzke, 1999-07-28 + * The ADDR argument is passed by value. + * + * Robb Matzke, 1999-08-06 + * Modified to work with the virtual file layer. + *------------------------------------------------------------------------- + */ +herr_t +H5FD_mpi_haddr_to_MPIOff(haddr_t addr, MPI_Offset *mpi_off/*out*/) +{ + herr_t ret_value=FAIL; + + FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5FD_mpi_haddr_to_MPIOff) + + assert(mpi_off); + + /* Convert the HDF5 address into an MPI offset */ + *mpi_off = (MPI_Offset)addr; + + if (addr != (haddr_t)((MPI_Offset)addr)) + ret_value=FAIL; + else + ret_value=SUCCEED; + + FUNC_LEAVE_NOAPI(ret_value) +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_mpi_comm_info_dup + * + * Purpose: Make duplicates of communicator and Info object. + * If the Info object is in fact MPI_INFO_NULL, no duplicate + * is made but the same value assigned to the new Info object + * handle. + * + * Return: Success: Non-negative. The new communicator and Info + * object handles are returned via comm_new and + * info_new pointers. + * + * Failure: Negative. + * + * Programmer: Albert Cheng + * Jan 8, 2003 + * + * Modifications: + *------------------------------------------------------------------------- + */ +herr_t +H5FD_mpi_comm_info_dup(MPI_Comm comm, MPI_Info info, MPI_Comm *comm_new, MPI_Info *info_new) +{ + herr_t ret_value=SUCCEED; + MPI_Comm comm_dup=MPI_COMM_NULL; + MPI_Info info_dup=MPI_INFO_NULL; + int mpi_code; + + FUNC_ENTER_NOAPI(H5FD_mpi_comm_info_dup, FAIL) + + /* Check arguments */ + if (MPI_COMM_NULL == comm) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADVALUE, FAIL, "not a valid argument") + if (!comm_new || !info_new) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADVALUE, FAIL, "bad pointers") + + /* Dup them. Using temporary variables for error recovery cleanup. */ + if (MPI_SUCCESS != (mpi_code=MPI_Comm_dup(comm, &comm_dup))) + HMPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code) + if (MPI_INFO_NULL != info){ + if (MPI_SUCCESS != (mpi_code=MPI_Info_dup(info, &info_dup))) + HMPI_GOTO_ERROR(FAIL, "MPI_Info_dup failed", mpi_code) + }else{ + /* No dup, just copy it. */ + info_dup = info; + } + + /* copy them to the return arguments */ + *comm_new = comm_dup; + *info_new = info_dup; + +done: + if (FAIL == ret_value){ + /* need to free anything created here */ + if (MPI_COMM_NULL != comm_dup) + MPI_Comm_free(&comm_dup); + if (MPI_INFO_NULL != info_dup) + MPI_Info_free(&info_dup); + } + + FUNC_LEAVE_NOAPI(ret_value) +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_mpi_comm_info_free + * + * Purpose: Free the communicator and Info object. + * If comm or info is in fact MPI_COMM_NULL or MPI_INFO_NULL + * respectively, no action occurs to it. + * + * Return: Success: Non-negative. The values the pointers refer + * to will be set to the corresponding NULL + * handles. + * + * Failure: Negative. + * + * Programmer: Albert Cheng + * Jan 8, 2003 + * + * Modifications: + *------------------------------------------------------------------------- + */ +herr_t +H5FD_mpi_comm_info_free(MPI_Comm *comm, MPI_Info *info) +{ + herr_t ret_value=SUCCEED; + FUNC_ENTER_NOAPI(H5FD_mpi_comm_info_free, FAIL) + + /* Check arguments */ + if (!comm || !info) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADVALUE, FAIL, "not a valid argument") + + if (MPI_COMM_NULL != *comm) + MPI_Comm_free(comm); + if (MPI_INFO_NULL != *info) + MPI_Info_free(info); + +done: + FUNC_LEAVE_NOAPI(ret_value) +} + +#ifdef NOT_YET + +/*------------------------------------------------------------------------- + * Function: H5FD_mpio_wait_for_left_neighbor + * + * Purpose: Blocks until (empty) msg is received from immediately + * lower-rank neighbor. In conjunction with + * H5FD_mpio_signal_right_neighbor, useful for enforcing + * 1-process-at-at-time access to critical regions to avoid race + * conditions (though it is overkill to require that the + * processes be allowed to proceed strictly in order of their + * rank). + * + * Note: This routine doesn't read or write any file, just performs + * interprocess coordination. It really should reside in a + * separate package of such routines. + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: rky + * 19981207 + * + * Modifications: + * Robb Matzke, 1999-08-09 + * Modified to work with the virtual file layer. + *------------------------------------------------------------------------- + */ +herr_t +H5FD_mpio_wait_for_left_neighbor(H5FD_t *_file) +{ + H5FD_mpio_t *file = (H5FD_mpio_t*)_file; + char msgbuf[1]; + MPI_Status rcvstat; + int mpi_code; /* mpi return code */ + herr_t ret_value=SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5FD_mpio_wait_for_left_neighbor, FAIL) + + assert(file); + assert(H5FD_MPIO==file->pub.driver_id); + + /* Portably initialize MPI status variable */ + HDmemset(&rcvstat,0,sizeof(MPI_Status)); + + /* p0 has no left neighbor; all other procs wait for msg */ + if (file->mpi_rank != 0) { + if (MPI_SUCCESS != (mpi_code=MPI_Recv( &msgbuf, 1, MPI_CHAR, + file->mpi_rank-1, MPI_ANY_TAG, file->comm, &rcvstat ))) + HMPI_GOTO_ERROR(FAIL, "MPI_Recv failed", mpi_code) + } + +done: + FUNC_LEAVE_NOAPI(ret_value) +} + + +/*------------------------------------------------------------------------- + * Function: H5FD_mpio_signal_right_neighbor + * + * Purpose: Blocks until (empty) msg is received from immediately + * lower-rank neighbor. In conjunction with + * H5FD_mpio_wait_for_left_neighbor, useful for enforcing + * 1-process-at-at-time access to critical regions to avoid race + * conditions (though it is overkill to require that the + * processes be allowed to proceed strictly in order of their + * rank). + * + * Note: This routine doesn't read or write any file, just performs + * interprocess coordination. It really should reside in a + * separate package of such routines. + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: rky + * 19981207 + * + * Modifications: + * Robb Matzke, 1999-08-09 + * Modified to work with the virtual file layer. + *------------------------------------------------------------------------- + */ +herr_t +H5FD_mpio_signal_right_neighbor(H5FD_t *_file) +{ + H5FD_mpio_t *file = (H5FD_mpio_t*)_file; + char msgbuf[1]; + int mpi_code; /* mpi return code */ + herr_t ret_value=SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5FD_mpio_signal_right_neighbor, FAIL) + + assert(file); + assert(H5FD_MPIO==file->pub.driver_id); + + if (file->mpi_rank != (file->mpi_size-1)) { + if (MPI_SUCCESS != (mpi_code=MPI_Send(&msgbuf, 0/*empty msg*/, MPI_CHAR, + file->mpi_rank+1, 0, file->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Send failed", mpi_code) + } + +done: + FUNC_LEAVE_NOAPI(ret_value) +} +#endif /* NOT_YET */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_mpi_setup_collective + * + * Purpose: Set the buffer type BTYPE, file type FTYPE for a data + * transfer. Also request a MPI type transfer. + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Robb Matzke + * Monday, August 9, 1999 + * + * Modifications: + * + * Quincey Koziol - 2002/06/17 + * Removed 'disp' parameter, read & write routines will use + * the address of the dataset in MPI_File_set_view() calls, as + * necessary. + * + * Quincey Koziol - 2002/06/17 + * Changed to set temporary properties in a dxpl, instead of + * flags in the file struct, which will make this more threadsafe. + * + *------------------------------------------------------------------------- + */ +herr_t +H5FD_mpi_setup_collective(hid_t dxpl_id, MPI_Datatype btype, MPI_Datatype ftype) +{ + H5P_genplist_t *plist; /* Property list pointer */ + herr_t ret_value=SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5FD_mpi_setup_collective, FAIL) + + /* Check arguments */ + if(NULL == (plist = H5P_object_verify(dxpl_id,H5P_DATASET_XFER))) + HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dataset transfer list") + + /* Set buffer MPI type */ + if(H5P_insert(plist,H5FD_MPI_XFER_MEM_MPI_TYPE_NAME,H5FD_MPI_XFER_MEM_MPI_TYPE_SIZE,&btype,NULL,NULL,NULL,NULL,NULL,NULL)<0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't insert MPI-I/O property") + + /* Set file MPI type */ + if(H5P_insert(plist,H5FD_MPI_XFER_FILE_MPI_TYPE_NAME,H5FD_MPI_XFER_FILE_MPI_TYPE_SIZE,&ftype,NULL,NULL,NULL,NULL,NULL,NULL)<0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't insert MPI-I/O property") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_mpi_setup_collective() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_mpi_teardown_collective + * + * Purpose: Remove the temporary MPI-I/O properties from dxpl. + * + * Return: Success: Non-negative + * Failure: Negative + * + * Programmer: Quincey Koziol + * Monday, June 17, 2002 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +herr_t +H5FD_mpi_teardown_collective(hid_t dxpl_id) +{ + H5P_genplist_t *plist; /* Property list pointer */ + herr_t ret_value=SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(H5FD_mpi_teardown_collective, FAIL) + + /* Check arguments */ + if(NULL == (plist = H5P_object_verify(dxpl_id,H5P_DATASET_XFER))) + HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dataset transfer list") + + /* Remove buffer MPI type */ + if(H5P_remove(dxpl_id,plist,H5FD_MPI_XFER_MEM_MPI_TYPE_NAME)<0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTDELETE, FAIL, "can't remove MPI-I/O property") + + /* Remove file MPI type */ + if(H5P_remove(dxpl_id,plist,H5FD_MPI_XFER_FILE_MPI_TYPE_NAME)<0) + HGOTO_ERROR(H5E_PLIST, H5E_CANTDELETE, FAIL, "can't remove MPI-I/O property") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_mpi_teardown_collective() */ + +#endif /* H5_HAVE_PARALLEL */ + diff --git a/src/H5FDmpi.h b/src/H5FDmpi.h new file mode 100644 index 0000000..0de063a --- /dev/null +++ b/src/H5FDmpi.h @@ -0,0 +1,93 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the files COPYING and Copyright.html. COPYING can be found at the root * + * of the source code distribution tree; Copyright.html can be found at the * + * root level of an installed copy of the electronic HDF5 document set and * + * is linked from the top-level documents page. It can also be found at * + * http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html. If you do not have * + * access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Programmer: Quincey Koziol <koziol@ncsa.uiuc.edu> + * Friday, January 30, 2004 + * + * Purpose: The public header file for common items for all MPI VFL drivers + */ +#ifndef H5FDmpi_H +#define H5FDmpi_H + +#ifdef H5_HAVE_PARALLEL + +/* Type of I/O for data transfer properties */ +typedef enum H5FD_mpio_xfer_t { + H5FD_MPIO_INDEPENDENT = 0, /*zero is the default*/ + H5FD_MPIO_COLLECTIVE +} H5FD_mpio_xfer_t; + +/* Sub-class the H5FD_class_t to add more specific functions for MPI-based VFDs */ +typedef struct H5FD_class_mpi_t { + H5FD_class_t super; /* Superclass information & methods */ + int (*get_rank)(const H5FD_t *file); /* Get the MPI rank of a process */ + int (*get_size)(const H5FD_t *file); /* Get the MPI size of a communicator */ + MPI_Comm (*get_comm)(const H5FD_t *file); /* Get the communicator for a file */ +} H5FD_class_mpi_t; + +/* Include all the MPI VFL headers */ +#include "H5FDfphdf5.h" /* Flexible PHDF5 file driver */ +#include "H5FDmpio.h" /* MPI I/O file driver */ +#include "H5FDmpiposix.h" /* MPI/posix I/O file driver */ + +/* + * The view is set to this value + */ +extern char H5FD_mpi_native_g[]; + +/* Macros */ + +/* Single macro to check for all file drivers that use MPI */ +#define IS_H5FD_MPI(file) \ + (IS_H5FD_MPIO(file) || IS_H5FD_MPIPOSIX(file) || IS_H5FD_FPHDF5(file)) + +/* ======== Temporary data transfer properties ======== */ +/* Definitions for memory MPI type property */ +#define H5FD_MPI_XFER_MEM_MPI_TYPE_NAME "H5FD_mpi_mem_mpi_type" +#define H5FD_MPI_XFER_MEM_MPI_TYPE_SIZE sizeof(MPI_Datatype) +/* Definitions for file MPI type property */ +#define H5FD_MPI_XFER_FILE_MPI_TYPE_NAME "H5FD_mpi_file_mpi_type" +#define H5FD_MPI_XFER_FILE_MPI_TYPE_SIZE sizeof(MPI_Datatype) + +/* Function prototypes */ +#ifdef __cplusplus +extern "C" { +#endif +/* General routines */ +H5_DLL haddr_t H5FD_mpi_MPIOff_to_haddr(MPI_Offset mpi_off); +H5_DLL herr_t H5FD_mpi_haddr_to_MPIOff(haddr_t addr, MPI_Offset *mpi_off/*out*/); +H5_DLL herr_t H5FD_mpi_comm_info_dup(MPI_Comm comm, MPI_Info info, + MPI_Comm *comm_new, MPI_Info *info_new); +H5_DLL herr_t H5FD_mpi_comm_info_free(MPI_Comm *comm, MPI_Info *info); +#ifdef NOT_YET +H5_DLL herr_t H5FD_mpio_wait_for_left_neighbor(H5FD_t *file); +H5_DLL herr_t H5FD_mpio_signal_right_neighbor(H5FD_t *file); +#endif /* NOT_YET */ +H5_DLL herr_t H5FD_mpi_setup_collective(hid_t dxpl_id, MPI_Datatype btype, + MPI_Datatype ftype); +H5_DLL herr_t H5FD_mpi_teardown_collective(hid_t dxpl_id); + +/* Driver specific methods */ +H5_DLL int H5FD_mpi_get_rank(const H5FD_t *file); +H5_DLL int H5FD_mpi_get_size(const H5FD_t *file); +H5_DLL MPI_Comm H5FD_mpi_get_comm(const H5FD_t *_file); +#ifdef __cplusplus +} +#endif + +#endif /* H5_HAVE_PARALLEL */ + +#endif /* H5FDmpi_H */ + diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index 0a62158..6c5d50e 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -30,7 +30,7 @@ #include "H5Eprivate.h" /* Error handling */ #include "H5Fprivate.h" /* File access */ #include "H5FDprivate.h" /* File drivers */ -#include "H5FDmpio.h" /* MPI I/O file driver */ +#include "H5FDmpi.h" /* MPI-based file drivers */ #include "H5Iprivate.h" /* IDs */ #include "H5MMprivate.h" /* Memory management */ #include "H5Pprivate.h" /* Property lists */ @@ -57,14 +57,13 @@ typedef struct H5FD_mpio_t { MPI_Info info; /*file information */ int mpi_rank; /* This process's rank */ int mpi_size; /* Total number of processes */ + hbool_t truncate_pending; /* Whether a file truncation is pending */ haddr_t eof; /*end-of-file marker */ haddr_t eoa; /*end-of-address marker */ haddr_t last_eoa; /* Last known end-of-address marker */ } H5FD_mpio_t; -/* Prototypes */ -static haddr_t H5FD_mpio_MPIOff_to_haddr(MPI_Offset mpi_off); -static herr_t H5FD_mpio_haddr_to_MPIOff(haddr_t addr, MPI_Offset *mpi_off/*out*/); +/* Private Prototypes */ /* Callbacks */ static void *H5FD_mpio_fapl_get(H5FD_t *_file); @@ -83,9 +82,9 @@ static herr_t H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, hadd static herr_t H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, size_t size, const void *buf); static herr_t H5FD_mpio_flush(H5FD_t *_file, hid_t dxpl_id, unsigned closing); -static herr_t H5FD_mpio_comm_info_dup(MPI_Comm comm, MPI_Info info, - MPI_Comm *comm_new, MPI_Info *info_new); -static herr_t H5FD_mpio_comm_info_free(MPI_Comm *comm, MPI_Info *info); +static int H5FD_mpio_mpi_rank(const H5FD_t *_file); +static int H5FD_mpio_mpi_size(const H5FD_t *_file); +static MPI_Comm H5FD_mpio_communicator(const H5FD_t *_file); /* MPIO-specific file access properties */ typedef struct H5FD_mpio_fapl_t { @@ -94,7 +93,8 @@ typedef struct H5FD_mpio_fapl_t { } H5FD_mpio_fapl_t; /* The MPIO file driver information */ -static const H5FD_class_t H5FD_mpio_g = { +static const H5FD_class_mpi_t H5FD_mpio_g = { + { /* Start of superclass information */ "mpio", /*name */ HADDR_MAX, /*maxaddr */ H5F_CLOSE_SEMI, /* fc_degree */ @@ -124,6 +124,10 @@ static const H5FD_class_t H5FD_mpio_g = { NULL, /*lock */ NULL, /*unlock */ H5FD_FLMAP_SINGLE /*fl_map */ + }, /* End of superclass information */ + H5FD_mpio_mpi_rank, /*get_rank */ + H5FD_mpio_mpi_size, /*get_size */ + H5FD_mpio_communicator /*get_comm */ }; #ifdef H5FDmpio_DEBUG @@ -146,29 +150,10 @@ static int H5FD_mpio_Debug[256] = 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; #endif -#ifdef OLD_METADATA_WRITE -/* Global var to allow elimination of redundant metadata writes - * to be controlled by the value of an environment variable. */ -/* Use the elimination by default unless this is the Intel Red machine */ -#ifndef __PUMAGON__ -hbool_t H5_mpi_1_metawrite_g = TRUE; -#else -hbool_t H5_mpi_1_metawrite_g = FALSE; -#endif -#endif /* OLD_METADATA_WRITE */ - /* Interface initialization */ #define INTERFACE_INIT H5FD_mpio_init static int interface_initialize_g = 0; -/* ======== Temporary, Local data transfer properties ======== */ -/* Definitions for memory MPI type property */ -#define H5FD_MPIO_XFER_MEM_MPI_TYPE_NAME "H5FD_mpio_mem_mpi_type" -#define H5FD_MPIO_XFER_MEM_MPI_TYPE_SIZE sizeof(MPI_Datatype) -/* Definitions for file MPI type property */ -#define H5FD_MPIO_XFER_FILE_MPI_TYPE_NAME "H5FD_mpio_file_mpi_type" -#define H5FD_MPIO_XFER_FILE_MPI_TYPE_SIZE sizeof(MPI_Datatype) - /*------------------------------------------------------------------------- * Function: H5FD_mpio_init @@ -198,7 +183,7 @@ H5FD_mpio_init(void) FUNC_ENTER_NOAPI(H5FD_mpio_init, FAIL) if (H5I_VFL!=H5Iget_type(H5FD_MPIO_g)) - H5FD_MPIO_g = H5FDregister(&H5FD_mpio_g); + H5FD_MPIO_g = H5FD_register((const H5FD_class_t *)&H5FD_mpio_g,sizeof(H5FD_class_mpi_t)); #ifdef H5FDmpio_DEBUG if (!H5FD_mpio_Debug_inited) @@ -504,304 +489,6 @@ done: /*------------------------------------------------------------------------- - * Function: H5FD_mpio_communicator - * - * Purpose: Returns the MPI communicator for the file. - * - * Return: Success: The communicator - * - * Failure: NULL - * - * Programmer: Robb Matzke - * Monday, August 9, 1999 - * - * Modifications: - * - *------------------------------------------------------------------------- - */ -MPI_Comm -H5FD_mpio_communicator(H5FD_t *_file) -{ - H5FD_mpio_t *file = (H5FD_mpio_t*)_file; - MPI_Comm ret_value; /* Return value */ - - FUNC_ENTER_NOAPI(H5FD_mpio_communicator, MPI_COMM_NULL) - - assert(file); - assert(H5FD_MPIO==file->pub.driver_id); - - /* Set return value */ - ret_value=file->comm; - -done: - FUNC_LEAVE_NOAPI(ret_value) -} - - -/*------------------------------------------------------------------------- - * Function: H5FD_mpio_mpi_rank - * - * Purpose: Returns the MPI rank for a process - * - * Return: Success: non-negative - * Failure: negative - * - * Programmer: Quincey Koziol - * Thursday, May 16, 2002 - * - * Modifications: - * - *------------------------------------------------------------------------- - */ -int -H5FD_mpio_mpi_rank(H5FD_t *_file) -{ - H5FD_mpio_t *file = (H5FD_mpio_t*)_file; - int ret_value; /* Return value */ - - FUNC_ENTER_NOAPI(H5FD_mpio_mpi_rank, FAIL) - - assert(file); - assert(H5FD_MPIO==file->pub.driver_id); - - /* Set return value */ - ret_value=file->mpi_rank; - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5FD_mpio_mpi_rank() */ - - -/*------------------------------------------------------------------------- - * Function: H5FD_mpio_mpi_size - * - * Purpose: Returns the number of MPI processes - * - * Return: Success: non-negative - * Failure: negative - * - * Programmer: Quincey Koziol - * Thursday, May 16, 2002 - * - * Modifications: - * - *------------------------------------------------------------------------- - */ -int -H5FD_mpio_mpi_size(H5FD_t *_file) -{ - H5FD_mpio_t *file = (H5FD_mpio_t*)_file; - int ret_value; /* Return value */ - - FUNC_ENTER_NOAPI(H5FD_mpio_mpi_size, FAIL) - - assert(file); - assert(H5FD_MPIO==file->pub.driver_id); - - /* Set return value */ - ret_value=file->mpi_size; - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5FD_mpio_mpi_size() */ - - -/*------------------------------------------------------------------------- - * Function: H5FD_mpio_setup - * - * Purpose: Set the buffer type BTYPE, file type FTYPE for a data - * transfer. Also request a MPI type transfer. - * - * Return: Success: 0 - * Failure: -1 - * - * Programmer: Robb Matzke - * Monday, August 9, 1999 - * - * Modifications: - * - * Quincey Koziol - 2002/06/17 - * Removed 'disp' parameter, read & write routines will use - * the address of the dataset in MPI_File_set_view() calls, as - * necessary. - * - * Quincey Koziol - 2002/06/17 - * Changed to set temporary properties in a dxpl, instead of - * flags in the file struct, which will make this more threadsafe. - * - *------------------------------------------------------------------------- - */ -herr_t -H5FD_mpio_setup(hid_t dxpl_id, MPI_Datatype btype, MPI_Datatype ftype) -{ - H5P_genplist_t *plist; /* Property list pointer */ - herr_t ret_value=SUCCEED; /* Return value */ - - FUNC_ENTER_NOAPI(H5FD_mpio_setup, FAIL) - - /* Check arguments */ - if(NULL == (plist = H5P_object_verify(dxpl_id,H5P_DATASET_XFER))) - HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dataset transfer list") - - /* Set buffer MPI type */ - if(H5P_insert(plist,H5FD_MPIO_XFER_MEM_MPI_TYPE_NAME,H5FD_MPIO_XFER_MEM_MPI_TYPE_SIZE,&btype,NULL,NULL,NULL,NULL,NULL,NULL)<0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't insert MPI-I/O property") - - /* Set file MPI type */ - if(H5P_insert(plist,H5FD_MPIO_XFER_FILE_MPI_TYPE_NAME,H5FD_MPIO_XFER_FILE_MPI_TYPE_SIZE,&ftype,NULL,NULL,NULL,NULL,NULL,NULL)<0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't insert MPI-I/O property") - -done: - FUNC_LEAVE_NOAPI(ret_value) -} - - -/*------------------------------------------------------------------------- - * Function: H5FD_mpio_teardown - * - * Purpose: Remove the temporary MPI-I/O properties from dxpl. - * - * Return: Success: Non-negative - * Failure: Negative - * - * Programmer: Quincey Koziol - * Monday, June 17, 2002 - * - * Modifications: - * - *------------------------------------------------------------------------- - */ -herr_t -H5FD_mpio_teardown(hid_t dxpl_id) -{ - H5P_genplist_t *plist; /* Property list pointer */ - herr_t ret_value=SUCCEED; /* Return value */ - - FUNC_ENTER_NOAPI(H5FD_mpio_teardown, FAIL) - - /* Check arguments */ - if(NULL == (plist = H5P_object_verify(dxpl_id,H5P_DATASET_XFER))) - HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dataset transfer list") - - /* Remove buffer MPI type */ - if(H5P_remove(dxpl_id,plist,H5FD_MPIO_XFER_MEM_MPI_TYPE_NAME)<0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTDELETE, FAIL, "can't remove MPI-I/O property") - - /* Remove file MPI type */ - if(H5P_remove(dxpl_id,plist,H5FD_MPIO_XFER_FILE_MPI_TYPE_NAME)<0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTDELETE, FAIL, "can't remove MPI-I/O property") - -done: - FUNC_LEAVE_NOAPI(ret_value) -} - - -/*------------------------------------------------------------------------- - * Function: H5FD_mpio_wait_for_left_neighbor - * - * Purpose: Blocks until (empty) msg is received from immediately - * lower-rank neighbor. In conjunction with - * H5FD_mpio_signal_right_neighbor, useful for enforcing - * 1-process-at-at-time access to critical regions to avoid race - * conditions (though it is overkill to require that the - * processes be allowed to proceed strictly in order of their - * rank). - * - * Note: This routine doesn't read or write any file, just performs - * interprocess coordination. It really should reside in a - * separate package of such routines. - * - * Return: Success: 0 - * Failure: -1 - * - * Programmer: rky - * 19981207 - * - * Modifications: - * Robb Matzke, 1999-08-09 - * Modified to work with the virtual file layer. - *------------------------------------------------------------------------- - */ -herr_t -H5FD_mpio_wait_for_left_neighbor(H5FD_t *_file) -{ - H5FD_mpio_t *file = (H5FD_mpio_t*)_file; - char msgbuf[1]; - MPI_Status rcvstat; - int mpi_code; /* mpi return code */ - herr_t ret_value=SUCCEED; /* Return value */ - - FUNC_ENTER_NOAPI(H5FD_mpio_wait_for_left_neighbor, FAIL) - - assert(file); - assert(H5FD_MPIO==file->pub.driver_id); - - /* Portably initialize MPI status variable */ - HDmemset(&rcvstat,0,sizeof(MPI_Status)); - - /* p0 has no left neighbor; all other procs wait for msg */ - if (file->mpi_rank != 0) { - if (MPI_SUCCESS != (mpi_code=MPI_Recv( &msgbuf, 1, MPI_CHAR, - file->mpi_rank-1, MPI_ANY_TAG, file->comm, &rcvstat ))) - HMPI_GOTO_ERROR(FAIL, "MPI_Recv failed", mpi_code) - } - -done: - FUNC_LEAVE_NOAPI(ret_value) -} - - -/*------------------------------------------------------------------------- - * Function: H5FD_mpio_signal_right_neighbor - * - * Purpose: Blocks until (empty) msg is received from immediately - * lower-rank neighbor. In conjunction with - * H5FD_mpio_wait_for_left_neighbor, useful for enforcing - * 1-process-at-at-time access to critical regions to avoid race - * conditions (though it is overkill to require that the - * processes be allowed to proceed strictly in order of their - * rank). - * - * Note: This routine doesn't read or write any file, just performs - * interprocess coordination. It really should reside in a - * separate package of such routines. - * - * Return: Success: 0 - * Failure: -1 - * - * Programmer: rky - * 19981207 - * - * Modifications: - * Robb Matzke, 1999-08-09 - * Modified to work with the virtual file layer. - *------------------------------------------------------------------------- - */ -herr_t -H5FD_mpio_signal_right_neighbor(H5FD_t *_file) -{ - H5FD_mpio_t *file = (H5FD_mpio_t*)_file; - char msgbuf[1]; - int mpi_code; /* mpi return code */ - herr_t ret_value=SUCCEED; /* Return value */ - - FUNC_ENTER_NOAPI(H5FD_mpio_signal_right_neighbor, FAIL) - - assert(file); - assert(H5FD_MPIO==file->pub.driver_id); - - if (file->mpi_rank != (file->mpi_size-1)) { - if (MPI_SUCCESS != (mpi_code=MPI_Send(&msgbuf, 0/*empty msg*/, MPI_CHAR, - file->mpi_rank+1, 0, file->comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Send failed", mpi_code) - } - -done: - FUNC_LEAVE_NOAPI(ret_value) -} - - -/*------------------------------------------------------------------------- * Function: H5FD_mpio_fapl_get * * Purpose: Returns a file access property list which could be used to @@ -837,7 +524,7 @@ H5FD_mpio_fapl_get(H5FD_t *_file) HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed") /* Duplicate communicator and Info object. */ - if (FAIL==H5FD_mpio_comm_info_dup(file->comm, file->info, + if (FAIL==H5FD_mpi_comm_info_dup(file->comm, file->info, &fa->comm, &fa->info)) HGOTO_ERROR(H5E_INTERNAL, H5E_CANTCOPY, NULL, "Communicator/Info duplicate failed") @@ -885,7 +572,7 @@ fprintf(stderr, "enter H5FD_mpio_fapl_copy\n"); HDmemcpy(new_fa, old_fa, sizeof(H5FD_mpio_fapl_t)); /* Duplicate communicator and Info object. */ - if (FAIL==H5FD_mpio_comm_info_dup(old_fa->comm, old_fa->info, + if (FAIL==H5FD_mpi_comm_info_dup(old_fa->comm, old_fa->info, &new_fa->comm, &new_fa->info)) HGOTO_ERROR(H5E_INTERNAL, H5E_CANTCOPY, NULL, "Communicator/Info duplicate failed") ret_value = new_fa; @@ -936,7 +623,7 @@ fprintf(stderr, "in H5FD_mpio_fapl_free\n"); /* Free the internal communicator and INFO object */ assert(MPI_COMM_NULL!=fa->comm); - H5FD_mpio_comm_info_free(&fa->comm, &fa->info); + H5FD_mpi_comm_info_free(&fa->comm, &fa->info); H5MM_xfree(fa); done: @@ -1008,10 +695,9 @@ H5FD_mpio_open(const char *name, unsigned flags, hid_t fapl_id, const H5FD_mpio_fapl_t *fa=NULL; H5FD_mpio_fapl_t _fa; H5P_genplist_t *plist; /* Property list pointer */ - H5FD_t *ret_value; /* Return value */ MPI_Comm comm_dup=MPI_COMM_NULL; MPI_Info info_dup=MPI_INFO_NULL; - + H5FD_t *ret_value; /* Return value */ FUNC_ENTER_NOAPI(H5FD_mpio_open, NULL) @@ -1035,7 +721,7 @@ H5FD_mpio_open(const char *name, unsigned flags, hid_t fapl_id, } /* Duplicate communicator and Info object for use by this file. */ - if (FAIL==H5FD_mpio_comm_info_dup(fa->comm, fa->info, &comm_dup, &info_dup)) + if (FAIL==H5FD_mpi_comm_info_dup(fa->comm, fa->info, &comm_dup, &info_dup)) HGOTO_ERROR(H5E_INTERNAL, H5E_CANTCOPY, NULL, "Communicator/Info duplicate failed") /* convert HDF5 flags to MPI-IO flags */ @@ -1065,8 +751,7 @@ H5FD_mpio_open(const char *name, unsigned flags, hid_t fapl_id, #endif /*OKAY: CAST DISCARDS CONST*/ - mpi_code=MPI_File_open(comm_dup, (char*)name, mpi_amode, info_dup, &fh); - if (MPI_SUCCESS != mpi_code) + if (MPI_SUCCESS != (mpi_code=MPI_File_open(comm_dup, (char*)name, mpi_amode, info_dup, &fh))) HMPI_GOTO_ERROR(NULL, "MPI_File_open failed", mpi_code) file_opened=1; @@ -1076,40 +761,64 @@ H5FD_mpio_open(const char *name, unsigned flags, hid_t fapl_id, if (MPI_SUCCESS != (mpi_code=MPI_Comm_size (comm_dup, &mpi_size))) HMPI_GOTO_ERROR(NULL, "MPI_Comm_size failed", mpi_code) -/* Following changes in handling file-truncation made be rkyates and ppweidhaas, sep 99 */ - - /* Only processor p0 will get the filesize and broadcast it. */ - if (mpi_rank == 0) { - /* Get current file size */ - if (MPI_SUCCESS != (mpi_code=MPI_File_get_size(fh, &size))) - HMPI_GOTO_ERROR(NULL, "MPI_File_get_size failed", mpi_code) - } - - /* Broadcast file-size */ - if (MPI_SUCCESS != (mpi_code=MPI_Bcast(&size, sizeof(MPI_Offset), MPI_BYTE, 0, comm_dup))) - HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code) - - /* Only if size > 0, truncate the file - if requested */ - if (size && (flags & H5F_ACC_TRUNC)) { - if (MPI_SUCCESS != (mpi_code=MPI_File_set_size(fh, (MPI_Offset)0))) - HMPI_GOTO_ERROR(NULL, "MPI_File_set_size failed", mpi_code) - - /* Don't let any proc return until all have truncated the file. */ - if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(comm_dup))) - HMPI_GOTO_ERROR(NULL, "MPI_Barrier failed", mpi_code) - size = 0; - } - /* Build the return value and initialize it */ if (NULL==(file=H5MM_calloc(sizeof(H5FD_mpio_t)))) HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed") - file->f = fh; file->comm = comm_dup; file->info = info_dup; file->mpi_rank = mpi_rank; file->mpi_size = mpi_size; - file->eof = H5FD_mpio_MPIOff_to_haddr(size); + + /* Determine if the file should be truncated */ + if(flags & H5F_ACC_TRUNC) { +#ifdef H5_MPI_FILE_SET_SIZE_BIG + /* Indicate that a 'truncate' operation is pending on the file */ + file->truncate_pending=TRUE; + + /* File is treated as zero size now */ + size=0; +#else /* H5_MPI_FILE_SET_SIZE_BIG */ + /* Only processor p0 will get the filesize and broadcast it. */ + if (mpi_rank == 0) { + /* Get current file size */ + if (MPI_SUCCESS != (mpi_code=MPI_File_get_size(fh, &size))) + HMPI_GOTO_ERROR(NULL, "MPI_File_get_size failed", mpi_code) + } /* end if */ + + /* Broadcast file-size */ + if (MPI_SUCCESS != (mpi_code=MPI_Bcast(&size, sizeof(MPI_Offset), MPI_BYTE, 0, comm_dup))) + HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code) + + /* Only truncate the file if it is non-zero length */ + if(size) { + if (MPI_SUCCESS != (mpi_code=MPI_File_set_size(fh, (MPI_Offset)0))) + HMPI_GOTO_ERROR(NULL, "MPI_File_set_size failed", mpi_code) + + /* Don't let any proc return until all have truncated the file. */ + if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(comm_dup))) + HMPI_GOTO_ERROR(NULL, "MPI_Barrier failed", mpi_code) + + /* File is zero size now */ + size = 0; + } /* end if */ +#endif /* H5_MPI_FILE_SET_SIZE_BIG */ + } /* end if */ + else { + /* Only processor p0 will get the filesize and broadcast it. */ + if (mpi_rank == 0) { + /* Get current file size */ + if (MPI_SUCCESS != (mpi_code=MPI_File_get_size(fh, &size))) + HMPI_GOTO_ERROR(NULL, "MPI_File_get_size failed", mpi_code) + } /* end if */ + + /* Broadcast file size */ + if (MPI_SUCCESS != (mpi_code=MPI_Bcast(&size, sizeof(MPI_Offset), MPI_BYTE, 0, comm_dup))) + HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code) + } /* end else */ + + /* Set the size of the file (from library's perspective) */ + file->eof = H5FD_mpi_MPIOff_to_haddr(size); /* Set return value */ ret_value=(H5FD_t*)file; @@ -1161,8 +870,8 @@ static herr_t H5FD_mpio_close(H5FD_t *_file) { H5FD_mpio_t *file = (H5FD_mpio_t*)_file; - int mpi_code; /* mpi return code */ - herr_t ret_value=SUCCEED; /* Return value */ + int mpi_code; /* MPI return code */ + herr_t ret_value=SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(H5FD_mpio_close, FAIL) @@ -1172,13 +881,32 @@ H5FD_mpio_close(H5FD_t *_file) #endif assert(file); assert(H5FD_MPIO==file->pub.driver_id); + assert(file->eoa>0); + +#ifdef H5_MPI_FILE_SET_SIZE_BIG + /* Check if we should truncate the file */ + if(file->truncate_pending) { + MPI_Offset mpi_off; /* Offset to write test data at */ + + /* Some numeric conversions */ + if (H5FD_mpi_haddr_to_MPIOff(file->eoa, &mpi_off)<0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off") + + /* Truncate the extra data off the end */ + /* (Don't worry about a barrier after the call, since we are closing) */ + if (MPI_SUCCESS != (mpi_code=MPI_File_set_size(file->f, mpi_off))) + HMPI_DONE_ERROR(NULL, "MPI_File_set_size failed", mpi_code) + } /* end if */ +#else /* H5_MPI_FILE_SET_SIZE_BIG */ + assert(!file->truncate_pending); +#endif /* H5_MPI_FILE_SET_SIZE_BIG */ /* MPI_File_close sets argument to MPI_FILE_NULL */ if (MPI_SUCCESS != (mpi_code=MPI_File_close(&(file->f)/*in,out*/))) HMPI_GOTO_ERROR(FAIL, "MPI_File_close failed", mpi_code) /* Clean up other stuff */ - H5FD_mpio_comm_info_free(&file->comm, &file->info); + H5FD_mpi_comm_info_free(&file->comm, &file->info); H5MM_xfree(file); done: @@ -1490,7 +1218,7 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add HDmemset(&mpi_stat,0,sizeof(MPI_Status)); /* some numeric conversions */ - if (H5FD_mpio_haddr_to_MPIOff(addr, &mpi_off/*out*/)<0) + if (H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off/*out*/)<0) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off") size_i = (int)size; if ((hsize_t)size_i != size) @@ -1525,16 +1253,16 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add use_view_this_time=TRUE; /* prepare for a full-blown xfer using btype, ftype, and disp */ - if(H5P_get(plist,H5FD_MPIO_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0) + if(H5P_get(plist,H5FD_MPI_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") - if(H5P_get(plist,H5FD_MPIO_XFER_FILE_MPI_TYPE_NAME,&file_type)<0) + if(H5P_get(plist,H5FD_MPI_XFER_FILE_MPI_TYPE_NAME,&file_type)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") /* * Set the file view when we are using MPI derived types */ /*OKAY: CAST DISCARDS CONST QUALIFIER*/ - if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, (char*)"native", file->info))) + if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, H5FD_mpi_native_g, file->info))) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) /* When using types, use the address as the displacement for @@ -1557,7 +1285,7 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add * Reset the file view when we used MPI derived types */ /*OKAY: CAST DISCARDS CONST QUALIFIER*/ - if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info))) + if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, H5FD_mpi_native_g, file->info))) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) } else { if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat))) @@ -1754,7 +1482,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, HDmemset(&mpi_stat,0,sizeof(MPI_Status)); /* some numeric conversions */ - if (H5FD_mpio_haddr_to_MPIOff(addr, &mpi_off)<0) + if (H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off)<0) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off") size_i = (int)size; if ((hsize_t)size_i != size) @@ -1789,16 +1517,16 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, use_view_this_time=TRUE; /* prepare for a full-blown xfer using btype, ftype, and disp */ - if(H5P_get(plist,H5FD_MPIO_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0) + if(H5P_get(plist,H5FD_MPI_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") - if(H5P_get(plist,H5FD_MPIO_XFER_FILE_MPI_TYPE_NAME,&file_type)<0) + if(H5P_get(plist,H5FD_MPI_XFER_FILE_MPI_TYPE_NAME,&file_type)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") /* * Set the file view when we are using MPI derived types */ /*OKAY: CAST DISCARDS CONST QUALIFIER*/ - if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, (char*)"native", file->info))) + if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, H5FD_mpi_native_g, file->info))) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) /* When using types, use the address as the displacement for @@ -1828,36 +1556,17 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) -#ifdef OLD_METADATA_WRITE - /* Only p<round> will do the actual write if all procs in comm write same metadata */ - if (H5_mpi_1_metawrite_g) { - if (file->mpi_rank != H5_PAR_META_WRITE) { + /* Only one process will do the actual write if all procs in comm write same metadata */ + if (file->mpi_rank != H5_PAR_META_WRITE) { #ifdef H5FDmpio_DEBUG - if (H5FD_mpio_Debug[(int)'w']) { - fprintf(stdout, - " proc %d: in H5FD_mpio_write (write omitted)\n", - file->mpi_rank ); - } -#endif - HGOTO_DONE(SUCCEED) /* skip the actual write */ + if (H5FD_mpio_Debug[(int)'w']) { + fprintf(stdout, + " proc %d: in H5FD_mpio_write (write omitted)\n", + file->mpi_rank ); } - } -#else /* OLD_METADATA_WRITE */ - /* Remember that views are used */ - use_view_this_time=TRUE; - - /* - * Set the file view when we are using MPI derived types - */ - /*OKAY: CAST DISCARDS CONST QUALIFIER*/ - if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_off, MPI_BYTE, MPI_BYTE, (char*)"native", file->info))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) - - /* When using types, use the address as the displacement for - * MPI_File_set_view and reset the address for the read to zero - */ - mpi_off=0; -#endif /* OLD_METADATA_WRITE */ +#endif + HGOTO_DONE(SUCCEED) /* skip the actual write */ + } /* end if */ } /* end if */ /* Write the data. */ @@ -1874,7 +1583,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, * Reset the file view when we used MPI derived types */ /*OKAY: CAST DISCARDS CONST QUALIFIER*/ - if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info))) + if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, H5FD_mpi_native_g, file->info))) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) } else { /*OKAY: CAST DISCARDS CONST QUALIFIER*/ @@ -1906,20 +1615,17 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, file->eof = HADDR_UNDEF; done: -#ifdef OLD_METADATA_WRITE - /* if only p<round> writes, need to broadcast the ret_value to other processes */ - if ((type!=H5FD_MEM_DRAW) && H5_mpi_1_metawrite_g) { + /* if only one process writes, need to broadcast the ret_value to other processes */ + if (type!=H5FD_MEM_DRAW) { if (MPI_SUCCESS != (mpi_code=MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm))) HMPI_DONE_ERROR(FAIL, "MPI_Bcast failed", mpi_code) } /* end if */ -#endif /* OLD_METADATA_WRITE */ #ifdef H5FDmpio_DEBUG if (H5FD_mpio_Debug[(int)'t']) fprintf(stdout, "proc %d: Leaving H5FD_mpio_write with ret_value=%d\n", file->mpi_rank, ret_value ); #endif - FUNC_LEAVE_NOAPI(ret_value) } @@ -1960,10 +1666,6 @@ H5FD_mpio_flush(H5FD_t *_file, hid_t UNUSED dxpl_id, unsigned closing) int mpi_code; /* mpi return code */ MPI_Offset mpi_off; herr_t ret_value=SUCCEED; -#ifndef H5_MPI_FILE_SET_SIZE_BIG - uint8_t byte=0; - MPI_Status mpi_stat; -#endif /* H5_MPI_FILE_SET_SIZE_BIG */ FUNC_ENTER_NOAPI(H5FD_mpio_flush, FAIL) @@ -1974,26 +1676,30 @@ H5FD_mpio_flush(H5FD_t *_file, hid_t UNUSED dxpl_id, unsigned closing) assert(file); assert(H5FD_MPIO==file->pub.driver_id); -#ifndef H5_MPI_FILE_SET_SIZE_BIG - /* Portably initialize MPI status variable */ - HDmemset(&mpi_stat,0,sizeof(MPI_Status)); -#endif /* H5_MPI_FILE_SET_SIZE_BIG */ - /* Extend the file to make sure it's large enough, then sync. * Unfortunately, keeping track of EOF is an expensive operation, so * we can't just check whether EOF<EOA like with other drivers. * Therefore we'll just read the byte at EOA-1 and then write it back. */ if(file->eoa>file->last_eoa) { #ifdef H5_MPI_FILE_SET_SIZE_BIG - if (H5FD_mpio_haddr_to_MPIOff(file->eoa, &mpi_off)<0) + if (H5FD_mpi_haddr_to_MPIOff(file->eoa, &mpi_off)<0) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "cannot convert from haddr_t to MPI_Offset") /* Extend the file's size */ if (MPI_SUCCESS != (mpi_code=MPI_File_set_size(file->f, mpi_off))) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_size failed", mpi_code) + + /* File does not need to be truncated now */ + file->truncate_pending=FALSE; #else /* H5_MPI_FILE_SET_SIZE_BIG */ if (0==file->mpi_rank) { - if (H5FD_mpio_haddr_to_MPIOff(file->eoa-1, &mpi_off)<0) + uint8_t byte=0; + MPI_Status mpi_stat; + + /* Portably initialize MPI status variable */ + HDmemset(&mpi_stat,0,sizeof(MPI_Status)); + + if (H5FD_mpi_haddr_to_MPIOff(file->eoa-1, &mpi_off)<0) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "cannot convert from haddr_t to MPI_Offset") if (MPI_SUCCESS != (mpi_code=MPI_File_read_at(file->f, mpi_off, &byte, 1, MPI_BYTE, &mpi_stat))) HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code) @@ -2032,201 +1738,105 @@ done: /*------------------------------------------------------------------------- - * Function: H5FD_mpio_MPIOff_to_haddr + * Function: H5FD_mpio_mpi_rank * - * Purpose: Convert an MPI_Offset value to haddr_t. + * Purpose: Returns the MPI rank for a process * - * Return: Success: The haddr_t equivalent of the MPI_OFF - * argument. - * - * Failure: HADDR_UNDEF + * Return: Success: non-negative + * Failure: negative * - * Programmer: Unknown - * January 30, 1998 + * Programmer: Quincey Koziol + * Thursday, May 16, 2002 * * Modifications: - * Robb Matzke, 1999-04-23 - * An error is reported for address overflows. The ADDR output - * argument is optional. * - * Robb Matzke, 1999-08-06 - * Modified to work with the virtual file layer. - *------------------------------------------------------------------------- + *------------------------------------------------------------------------- */ -static haddr_t -H5FD_mpio_MPIOff_to_haddr(MPI_Offset mpi_off) +static int +H5FD_mpio_mpi_rank(const H5FD_t *_file) { - haddr_t ret_value=HADDR_UNDEF; + const H5FD_mpio_t *file = (const H5FD_mpio_t*)_file; + int ret_value; /* Return value */ + + FUNC_ENTER_NOAPI(H5FD_mpio_mpi_rank, FAIL) - FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5FD_mpio_MPIOff_to_haddr) + assert(file); + assert(H5FD_MPIO==file->pub.driver_id); - if (mpi_off != (MPI_Offset)(haddr_t)mpi_off) - ret_value=HADDR_UNDEF; - else - ret_value=(haddr_t)mpi_off; + /* Set return value */ + ret_value=file->mpi_rank; +done: FUNC_LEAVE_NOAPI(ret_value) -} +} /* end H5FD_mpio_mpi_rank() */ /*------------------------------------------------------------------------- - * Function: H5FD_mpio_haddr_to_MPIOff - * - * Purpose: Convert an haddr_t value to MPI_Offset. + * Function: H5FD_mpio_mpi_size * - * Return: Success: Non-negative, the MPI_OFF argument contains - * the converted value. + * Purpose: Returns the number of MPI processes * - * Failure: Negative, MPI_OFF is undefined. + * Return: Success: non-negative + * Failure: negative * - * Programmer: Unknown - * January 30, 1998 + * Programmer: Quincey Koziol + * Thursday, May 16, 2002 * * Modifications: - * Robb Matzke, 1999-04-23 - * An error is reported for address overflows. The ADDR output - * argument is optional. - * - * Robb Matzke, 1999-07-28 - * The ADDR argument is passed by value. * - * Robb Matzke, 1999-08-06 - * Modified to work with the virtual file layer. *------------------------------------------------------------------------- */ -static herr_t -H5FD_mpio_haddr_to_MPIOff(haddr_t addr, MPI_Offset *mpi_off/*out*/) +static int +H5FD_mpio_mpi_size(const H5FD_t *_file) { - herr_t ret_value=FAIL; - - FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5FD_mpio_haddr_to_MPIOff) - - if (mpi_off) - *mpi_off = (MPI_Offset)addr; - if (addr != (haddr_t)(MPI_Offset)addr) - ret_value=FAIL; - else - ret_value=SUCCEED; - - FUNC_LEAVE_NOAPI(ret_value) -} + const H5FD_mpio_t *file = (const H5FD_mpio_t*)_file; + int ret_value; /* Return value */ - -/*------------------------------------------------------------------------- - * Function: H5FD_mpio_comm_info_dup - * - * Purpose: Make duplicates of communicator and Info object. - * If the Info object is in fact MPI_INFO_NULL, no duplicate - * is made but the same value assigned to the new Info object - * handle. - * - * Return: Success: Non-negative. The new communicator and Info - * object handles are returned via comm_new and - * info_new pointers. - * - * Failure: Negative. - * - * Programmer: Albert Cheng - * Jan 8, 2003 - * - * Modifications: - *------------------------------------------------------------------------- - */ -static herr_t -H5FD_mpio_comm_info_dup(MPI_Comm comm, MPI_Info info, MPI_Comm *comm_new, MPI_Info *info_new) -{ - herr_t ret_value=SUCCEED; - MPI_Comm comm_dup=MPI_COMM_NULL; - MPI_Info info_dup=MPI_INFO_NULL; - int mpi_code; - - FUNC_ENTER_NOAPI(H5FD_mpio_comm_info_dup, FAIL) + FUNC_ENTER_NOAPI(H5FD_mpio_mpi_size, FAIL) -#ifdef H5FDmpio_DEBUG -if (H5FD_mpio_Debug[(int)'t']) -fprintf(stderr, "In H5FD_mpio_comm_info_dup: argument comm/info = %d/%ld\n", comm, (long)info); -#endif - /* Check arguments */ - if (MPI_COMM_NULL == comm) - HGOTO_ERROR(H5E_INTERNAL, H5E_BADVALUE, FAIL, "not a valid argument") - if (!comm_new || !info_new) - HGOTO_ERROR(H5E_INTERNAL, H5E_BADVALUE, FAIL, "bad pointers") - - /* Dup them. Using temporary variables for error recovery cleanup. */ - if (MPI_SUCCESS != (mpi_code=MPI_Comm_dup(comm, &comm_dup))) - HMPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code) - if (MPI_INFO_NULL != info){ - if (MPI_SUCCESS != (mpi_code=MPI_Info_dup(info, &info_dup))) - HMPI_GOTO_ERROR(FAIL, "MPI_Info_dup failed", mpi_code) - }else{ - /* No dup, just copy it. */ - info_dup = info; - } + assert(file); + assert(H5FD_MPIO==file->pub.driver_id); - /* copy them to the return arguments */ - *comm_new = comm_dup; - *info_new = info_dup; + /* Set return value */ + ret_value=file->mpi_size; done: - if (FAIL == ret_value){ - /* need to free anything created here */ - if (MPI_COMM_NULL != comm_dup) - MPI_Comm_free(&comm_dup); - if (MPI_INFO_NULL != info_dup) - MPI_Info_free(&info_dup); - } - -#ifdef H5FDmpio_DEBUG -if (H5FD_mpio_Debug[(int)'t']) -fprintf(stderr, "Leaving H5FD_mpio_comm_info_dup\n"); -#endif FUNC_LEAVE_NOAPI(ret_value) -} +} /* end H5FD_mpio_mpi_size() */ /*------------------------------------------------------------------------- - * Function: H5FD_mpio_comm_info_free + * Function: H5FD_mpio_communicator * - * Purpose: Free the communicator and Info object. - * If comm or info is in fact MPI_COMM_NULL or MPI_INFO_NULL - * respectively, no action occurs to it. + * Purpose: Returns the MPI communicator for the file. * - * Return: Success: Non-negative. The values the pointers refer - * to will be set to the corresponding NULL - * handles. + * Return: Success: The communicator * - * Failure: Negative. + * Failure: NULL * - * Programmer: Albert Cheng - * Jan 8, 2003 + * Programmer: Robb Matzke + * Monday, August 9, 1999 * * Modifications: + * *------------------------------------------------------------------------- */ -static herr_t -H5FD_mpio_comm_info_free(MPI_Comm *comm, MPI_Info *info) +static MPI_Comm +H5FD_mpio_communicator(const H5FD_t *_file) { - herr_t ret_value=SUCCEED; - FUNC_ENTER_NOAPI(H5FD_mpio_comm_info_free, FAIL) + const H5FD_mpio_t *file = (const H5FD_mpio_t*)_file; + MPI_Comm ret_value; /* Return value */ -#ifdef H5FDmpio_DEBUG -if (H5FD_mpio_Debug[(int)'t']) -fprintf(stderr, "in H5FD_mpio_comm_info_free\n"); -#endif - /* Check arguments */ - if (!comm || !info) - HGOTO_ERROR(H5E_INTERNAL, H5E_BADVALUE, FAIL, "not a valid argument") + FUNC_ENTER_NOAPI(H5FD_mpio_communicator, MPI_COMM_NULL) + + assert(file); + assert(H5FD_MPIO==file->pub.driver_id); - if (MPI_COMM_NULL != *comm) - MPI_Comm_free(comm); - if (MPI_INFO_NULL != *info) - MPI_Info_free(info); + /* Set return value */ + ret_value=file->comm; done: -#ifdef H5FDmpio_DEBUG -if (H5FD_mpio_Debug[(int)'t']) -fprintf(stderr, "Leaving H5FD_mpio_comm_info_free\n"); -#endif FUNC_LEAVE_NOAPI(ret_value) } + #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5FDmpio.h b/src/H5FDmpio.h index 7bd2545..0500989 100644 --- a/src/H5FDmpio.h +++ b/src/H5FDmpio.h @@ -21,21 +21,12 @@ #ifndef H5FDmpio_H #define H5FDmpio_H -#include "H5FDpublic.h" -#include "H5Ipublic.h" - #ifdef H5_HAVE_PARALLEL # define H5FD_MPIO (H5FD_mpio_init()) #else # define H5FD_MPIO (-1) #endif /* H5_HAVE_PARALLEL */ -/* Type of I/O for data transfer properties */ -typedef enum H5FD_mpio_xfer_t { - H5FD_MPIO_INDEPENDENT = 0, /*zero is the default*/ - H5FD_MPIO_COLLECTIVE -} H5FD_mpio_xfer_t; - /* Macros */ #define IS_H5FD_MPIO(f) /* (H5F_t *f) */ \ @@ -59,14 +50,6 @@ H5_DLL herr_t H5Pget_fapl_mpio(hid_t fapl_id, MPI_Comm *comm/*out*/, MPI_Info *info/*out*/); H5_DLL herr_t H5Pset_dxpl_mpio(hid_t dxpl_id, H5FD_mpio_xfer_t xfer_mode); H5_DLL herr_t H5Pget_dxpl_mpio(hid_t dxpl_id, H5FD_mpio_xfer_t *xfer_mode/*out*/); -H5_DLL MPI_Comm H5FD_mpio_communicator(H5FD_t *_file); -H5_DLL herr_t H5FD_mpio_setup(hid_t dxpl_id, MPI_Datatype btype, - MPI_Datatype ftype); -H5_DLL herr_t H5FD_mpio_teardown(hid_t dxpl_id); -H5_DLL herr_t H5FD_mpio_wait_for_left_neighbor(H5FD_t *file); -H5_DLL herr_t H5FD_mpio_signal_right_neighbor(H5FD_t *file); -H5_DLL int H5FD_mpio_mpi_rank(H5FD_t *_file); -H5_DLL int H5FD_mpio_mpi_size(H5FD_t *_file); #ifdef __cplusplus } #endif diff --git a/src/H5FDmpiposix.c b/src/H5FDmpiposix.c index 4dadb1b..c6263f5 100644 --- a/src/H5FDmpiposix.c +++ b/src/H5FDmpiposix.c @@ -41,7 +41,7 @@ #include "H5Eprivate.h" /* Error handling */ #include "H5Fprivate.h" /* File access */ #include "H5FDprivate.h" /* File drivers */ -#include "H5FDmpiposix.h" /* MPI/posix I/O file driver */ +#include "H5FDmpi.h" /* MPI-based file drivers */ #include "H5Iprivate.h" /* IDs */ #include "H5MMprivate.h" /* Memory management */ #include "H5Pprivate.h" /* Property lists */ @@ -189,6 +189,9 @@ static herr_t H5FD_mpiposix_read(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id, static herr_t H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id, haddr_t addr, size_t size, const void *buf); static herr_t H5FD_mpiposix_flush(H5FD_t *_file, hid_t dxpl_id, unsigned closing); +static int H5FD_mpiposix_mpi_rank(const H5FD_t *_file); +static int H5FD_mpiposix_mpi_size(const H5FD_t *_file); +static MPI_Comm H5FD_mpiposix_communicator(const H5FD_t *_file); /* MPIPOSIX-specific file access properties */ typedef struct H5FD_mpiposix_fapl_t { @@ -197,7 +200,8 @@ typedef struct H5FD_mpiposix_fapl_t { } H5FD_mpiposix_fapl_t; /* The MPIPOSIX file driver information */ -static const H5FD_class_t H5FD_mpiposix_g = { +static const H5FD_class_mpi_t H5FD_mpiposix_g = { + { /* Start of superclass information */ "mpiposix", /*name */ MAXADDR, /*maxaddr */ H5F_CLOSE_SEMI, /* fc_degree */ @@ -227,19 +231,12 @@ static const H5FD_class_t H5FD_mpiposix_g = { NULL, /*lock */ NULL, /*unlock */ H5FD_FLMAP_SINGLE /*fl_map */ + }, /* End of superclass information */ + H5FD_mpiposix_mpi_rank, /*get_rank */ + H5FD_mpiposix_mpi_size, /*get_size */ + H5FD_mpiposix_communicator /*get_comm */ }; -#ifdef OLD_METADATA_WRITE -/* Global var to allow elimination of redundant metadata writes - * to be controlled by the value of an environment variable. */ -/* Use the elimination by default unless this is the Intel Red machine */ -#ifndef __PUMAGON__ -hbool_t H5_mpiposix_1_metawrite_g = TRUE; -#else -hbool_t H5_mpiposix_1_metawrite_g = FALSE; -#endif -#endif /* OLD_METADATA_WRITE */ - /* Interface initialization */ #define INTERFACE_INIT H5FD_mpiposix_init static int interface_initialize_g = 0; @@ -270,7 +267,7 @@ H5FD_mpiposix_init(void) FUNC_ENTER_NOAPI(H5FD_mpiposix_init, FAIL) if (H5I_VFL!=H5Iget_type(H5FD_MPIPOSIX_g)) - H5FD_MPIPOSIX_g = H5FDregister(&H5FD_mpiposix_g); + H5FD_MPIPOSIX_g = H5FD_register((const H5FD_class_t *)&H5FD_mpiposix_g,sizeof(H5FD_class_mpi_t)); /* Set return value */ ret_value=H5FD_MPIPOSIX_g; @@ -405,109 +402,6 @@ done: /*------------------------------------------------------------------------- - * Function: H5FD_mpiposix_communicator - * - * Purpose: Returns the MPI communicator for the file. - * - * Return: Success: The communicator - * - * Failure: NULL - * - * Programmer: Quincey Koziol - * Thursday, July 11, 2002 - * - * Modifications: - * - *------------------------------------------------------------------------- - */ -MPI_Comm -H5FD_mpiposix_communicator(H5FD_t *_file) -{ - H5FD_mpiposix_t *file = (H5FD_mpiposix_t*)_file; - MPI_Comm ret_value; /* Return value */ - - FUNC_ENTER_NOAPI(H5FD_mpiposix_communicator, MPI_COMM_NULL) - - assert(file); - assert(H5FD_MPIPOSIX==file->pub.driver_id); - - /* Set return value */ - ret_value=file->comm; - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5FD_mpi_posix_communicator() */ - - -/*------------------------------------------------------------------------- - * Function: H5FD_mpiposix_mpi_rank - * - * Purpose: Returns the MPI rank for a process - * - * Return: Success: non-negative - * Failure: negative - * - * Programmer: Quincey Koziol - * Thursday, July 11, 2002 - * - * Modifications: - * - *------------------------------------------------------------------------- - */ -int -H5FD_mpiposix_mpi_rank(H5FD_t *_file) -{ - H5FD_mpiposix_t *file = (H5FD_mpiposix_t*)_file; - int ret_value; /* Return value */ - - FUNC_ENTER_NOAPI(H5FD_mpiposix_mpi_rank, FAIL) - - assert(file); - assert(H5FD_MPIPOSIX==file->pub.driver_id); - - /* Set return value */ - ret_value=file->mpi_rank; - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5FD_mpiposix_mpi_rank() */ - - -/*------------------------------------------------------------------------- - * Function: H5FD_mpiposix_mpi_size - * - * Purpose: Returns the number of MPI processes - * - * Return: Success: non-negative - * Failure: negative - * - * Programmer: Quincey Koziol - * Thursday, July 11, 2002 - * - * Modifications: - * - *------------------------------------------------------------------------- - */ -int -H5FD_mpiposix_mpi_size(H5FD_t *_file) -{ - H5FD_mpiposix_t *file = (H5FD_mpiposix_t*)_file; - int ret_value; /* Return value */ - - FUNC_ENTER_NOAPI(H5FD_mpiposix_mpi_rank, FAIL) - - assert(file); - assert(H5FD_MPIPOSIX==file->pub.driver_id); - - /* Set return value */ - ret_value=file->mpi_size; - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5FD_mpiposix_mpi_size() */ - - -/*------------------------------------------------------------------------- * Function: H5FD_mpiposix_fapl_get * * Purpose: Returns a file access property list which could be used to @@ -1310,12 +1204,9 @@ H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) - /* Only p<round> will do the actual write if all procs in comm write same metadata */ -#ifdef OLD_METADATA_WRITE - if (H5_mpiposix_1_metawrite_g) -#endif /* OLD_METADATA_WRITE */ - if (file->mpi_rank != H5_PAR_META_WRITE) - HGOTO_DONE(SUCCEED) /* skip the actual write */ + /* Only one process will do the actual write if all procs in comm write same metadata */ + if (file->mpi_rank != H5_PAR_META_WRITE) + HGOTO_DONE(SUCCEED) /* skip the actual write */ } /* end if */ #ifdef REPORT_IO @@ -1387,18 +1278,11 @@ done: } /* end if */ /* Guard against getting into metadata broadcast in failure cases */ else { - /* if only p<round> writes, need to broadcast the ret_value to other processes */ -#ifdef OLD_METADATA_WRITE - if ((type!=H5FD_MEM_DRAW) && H5_mpiposix_1_metawrite_g) { - if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) - } /* end if */ -#else /* OLD_METADATA_WRITE */ + /* when only one process writes, need to broadcast the ret_value to other processes */ if (type!=H5FD_MEM_DRAW) { if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) } /* end if */ -#endif /* OLD_METADATA_WRITE */ } /* end else */ FUNC_LEAVE_NOAPI(ret_value) @@ -1478,5 +1362,108 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5FD_mpiposix_flush() */ + +/*------------------------------------------------------------------------- + * Function: H5FD_mpiposix_mpi_rank + * + * Purpose: Returns the MPI rank for a process + * + * Return: Success: non-negative + * Failure: negative + * + * Programmer: Quincey Koziol + * Thursday, July 11, 2002 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static int +H5FD_mpiposix_mpi_rank(const H5FD_t *_file) +{ + const H5FD_mpiposix_t *file = (const H5FD_mpiposix_t*)_file; + int ret_value; /* Return value */ + + FUNC_ENTER_NOAPI(H5FD_mpiposix_mpi_rank, FAIL) + + assert(file); + assert(H5FD_MPIPOSIX==file->pub.driver_id); + + /* Set return value */ + ret_value=file->mpi_rank; + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_mpiposix_mpi_rank() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_mpiposix_mpi_size + * + * Purpose: Returns the number of MPI processes + * + * Return: Success: non-negative + * Failure: negative + * + * Programmer: Quincey Koziol + * Thursday, July 11, 2002 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static int +H5FD_mpiposix_mpi_size(const H5FD_t *_file) +{ + const H5FD_mpiposix_t *file = (const H5FD_mpiposix_t*)_file; + int ret_value; /* Return value */ + + FUNC_ENTER_NOAPI(H5FD_mpiposix_mpi_size, FAIL) + + assert(file); + assert(H5FD_MPIPOSIX==file->pub.driver_id); + + /* Set return value */ + ret_value=file->mpi_size; + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_mpiposix_mpi_size() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_mpiposix_communicator + * + * Purpose: Returns the MPI communicator for the file. + * + * Return: Success: The communicator + * + * Failure: NULL + * + * Programmer: Quincey Koziol + * Thursday, July 11, 2002 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static MPI_Comm +H5FD_mpiposix_communicator(const H5FD_t *_file) +{ + const H5FD_mpiposix_t *file = (const H5FD_mpiposix_t*)_file; + MPI_Comm ret_value; /* Return value */ + + FUNC_ENTER_NOAPI(H5FD_mpiposix_communicator, MPI_COMM_NULL) + + assert(file); + assert(H5FD_MPIPOSIX==file->pub.driver_id); + + /* Set return value */ + ret_value=file->comm; + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_mpi_posix_communicator() */ + #endif /*H5_HAVE_PARALLEL*/ diff --git a/src/H5FDmpiposix.h b/src/H5FDmpiposix.h index cae9370..4849a44 100644 --- a/src/H5FDmpiposix.h +++ b/src/H5FDmpiposix.h @@ -22,9 +22,6 @@ #ifndef __H5FDmpiposix_H #define __H5FDmpiposix_H -#include "H5FDpublic.h" -#include "H5Ipublic.h" - #ifdef H5_HAVE_PARALLEL # define H5FD_MPIPOSIX (H5FD_mpiposix_init()) #else @@ -46,10 +43,6 @@ extern "C" { H5_DLL hid_t H5FD_mpiposix_init(void); H5_DLL herr_t H5Pset_fapl_mpiposix(hid_t fapl_id, MPI_Comm comm, hbool_t use_gpfs); H5_DLL herr_t H5Pget_fapl_mpiposix(hid_t fapl_id, MPI_Comm *comm/*out*/, hbool_t *use_gpfs/*out*/); -H5_DLL MPI_Comm H5FD_mpiposix_communicator(H5FD_t *_file); -H5_DLL herr_t H5FD_mpiposix_closing(H5FD_t *file); -H5_DLL int H5FD_mpiposix_mpi_rank(H5FD_t *_file); -H5_DLL int H5FD_mpiposix_mpi_size(H5FD_t *_file); #ifdef __cplusplus } diff --git a/src/H5FDprivate.h b/src/H5FDprivate.h index 2dacf25..064d337 100644 --- a/src/H5FDprivate.h +++ b/src/H5FDprivate.h @@ -25,19 +25,13 @@ /* Private headers needed by this file */ /* - * The MPIO, MPIPOSIX, & FPHDF5 drivers are needed because there are + * The MPI drivers are needed because there are * places where we check for things that aren't handled by these drivers. */ -#include "H5FDfphdf5.h" -#include "H5FDmpio.h" -#include "H5FDmpiposix.h" +#include "H5FDmpi.h" /* MPI-based file drivers */ /* Macros */ -/* Single macro to check for all file drivers that use MPI */ -#define IS_H5FD_MPI(file) \ - (IS_H5FD_MPIO(file) || IS_H5FD_MPIPOSIX(file) || IS_H5FD_FPHDF5(file)) - /* Prototypes */ H5_DLL int H5FD_term_interface(void); H5_DLL H5FD_class_t *H5FD_get_class(hid_t id); @@ -51,6 +45,7 @@ H5_DLL herr_t H5FD_fapl_close(hid_t driver_id, void *fapl); H5_DLL herr_t H5FD_dxpl_open(struct H5P_genplist_t *plist, hid_t driver_id, const void *driver_info); H5_DLL herr_t H5FD_dxpl_copy(hid_t driver_id, const void *dxpl, void **copied_dxpl); H5_DLL herr_t H5FD_dxpl_close(hid_t driver_id, void *dxpl); +H5_DLL hid_t H5FD_register(const void *cls, size_t size); H5_DLL H5FD_t *H5FD_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr); H5_DLL herr_t H5FD_close(H5FD_t *file); diff --git a/src/H5FDsec2.c b/src/H5FDsec2.c index 5924c0a..7400c0a 100644 --- a/src/H5FDsec2.c +++ b/src/H5FDsec2.c @@ -225,7 +225,7 @@ H5FD_sec2_init(void) FUNC_ENTER_NOAPI(H5FD_sec2_init, FAIL) if (H5I_VFL!=H5I_get_type(H5FD_SEC2_g)) - H5FD_SEC2_g = H5FDregister(&H5FD_sec2_g); + H5FD_SEC2_g = H5FD_register(&H5FD_sec2_g,sizeof(H5FD_class_t)); /* Set return value */ ret_value=H5FD_SEC2_g; diff --git a/src/H5FDsrb.c b/src/H5FDsrb.c index 3b653f6..7f7b867 100644 --- a/src/H5FDsrb.c +++ b/src/H5FDsrb.c @@ -186,7 +186,7 @@ H5FD_srb_init(void) FUNC_ENTER_NOAPI(H5FD_srb_init, FAIL) if(H5I_VFL != H5Iget_type(H5FD_SRB_g)) - H5FD_SRB_g = H5FDregister(&H5FD_srb_g); + H5FD_SRB_g = H5FD_register(&H5FD_srb_g,sizeof(H5FD_class_t)); /* Set return value */ ret_value=H5FD_SRB_g; diff --git a/src/H5FDstream.c b/src/H5FDstream.c index 836528c..9492abd 100644 --- a/src/H5FDstream.c +++ b/src/H5FDstream.c @@ -230,7 +230,7 @@ hid_t H5FD_stream_init (void) FUNC_ENTER_NOAPI(H5FD_stream_init, FAIL) if (H5I_VFL != H5Iget_type (H5FD_STREAM_g)) { - H5FD_STREAM_g = H5FDregister (&H5FD_stream_g); + H5FD_STREAM_g = H5FD_register (&H5FD_stream_g,sizeof(H5FD_class_t)); /* set the process signal mask to ignore SIGPIPE signals */ /* NOTE: Windows doesn't know SIGPIPE signals that's why the #ifdef */ diff --git a/src/H5FPclient.c b/src/H5FPclient.c index e9f8ffb..434fa06 100644 --- a/src/H5FPclient.c +++ b/src/H5FPclient.c @@ -606,7 +606,7 @@ H5FP_request_close(H5FD_t *file, unsigned file_id, unsigned *req_id, req.req_type = H5FP_REQ_CLOSE; req.req_id = H5FP_gen_request_id(); req.file_id = file_id; - req.proc_rank = H5FD_fphdf5_mpi_rank(file); + req.proc_rank = H5FD_mpi_get_rank(file); if ((mrc = MPI_Send(&req, 1, H5FP_request, (int)H5FP_sap_rank, H5FP_TAG_REQUEST, H5FP_SAP_COMM)) != MPI_SUCCESS) @@ -661,7 +661,7 @@ H5FP_request_allocate(H5FD_t *file, H5FD_mem_t mem_type, hsize_t size, req.req_type = H5FP_REQ_ALLOC; req.req_id = H5FP_gen_request_id(); req.file_id = H5FD_fphdf5_file_id(file); - req.proc_rank = H5FD_fphdf5_mpi_rank(file); + req.proc_rank = H5FD_mpi_get_rank(file); req.mem_type = mem_type; req.meta_block_size = size; /* use this field as the size to allocate */ @@ -721,7 +721,7 @@ H5FP_request_free(H5FD_t *file, H5FD_mem_t mem_type, haddr_t addr, hsize_t size, req.req_type = H5FP_REQ_FREE; req.req_id = H5FP_gen_request_id(); req.file_id = H5FD_fphdf5_file_id(file); - req.proc_rank = H5FD_fphdf5_mpi_rank(file); + req.proc_rank = H5FD_mpi_get_rank(file); req.mem_type = mem_type; req.addr = addr; req.meta_block_size = size; @@ -778,7 +778,7 @@ H5FP_request_get_eoa(H5FD_t *file, haddr_t *eoa, unsigned *req_id, H5FP_status_t req.req_type = H5FP_REQ_GET_EOA; req.req_id = H5FP_gen_request_id(); req.file_id = H5FD_fphdf5_file_id(file); - req.proc_rank = H5FD_fphdf5_mpi_rank(file); + req.proc_rank = H5FD_mpi_get_rank(file); if ((mrc = MPI_Send(&req, 1, H5FP_request, (int)H5FP_sap_rank, H5FP_TAG_REQUEST, H5FP_SAP_COMM)) != MPI_SUCCESS) @@ -831,7 +831,7 @@ H5FP_request_set_eoa(H5FD_t *file, haddr_t eoa, unsigned *req_id, H5FP_status_t req.req_type = H5FP_REQ_SET_EOA; req.req_id = H5FP_gen_request_id(); req.file_id = H5FD_fphdf5_file_id(file); - req.proc_rank = H5FD_fphdf5_mpi_rank(file); + req.proc_rank = H5FD_mpi_get_rank(file); req.addr = eoa; if ((mrc = MPI_Send(&req, 1, H5FP_request, (int)H5FP_sap_rank, @@ -884,7 +884,7 @@ H5FP_request_update_eoma_eosda(H5FD_t *file, unsigned *req_id, H5FP_status_t *st req.req_type = H5FP_REQ_UPDATE_EOMA_EOSDA; req.req_id = H5FP_gen_request_id(); req.file_id = H5FD_fphdf5_file_id(file); - req.proc_rank = H5FD_fphdf5_mpi_rank(file); + req.proc_rank = H5FD_mpi_get_rank(file); if ((mrc = MPI_Send(&req, 1, H5FP_request, (int)H5FP_sap_rank, H5FP_TAG_REQUEST, H5FP_SAP_COMM)) != MPI_SUCCESS) diff --git a/src/H5FPserver.c b/src/H5FPserver.c index 13b22f9..1e2ca62 100644 --- a/src/H5FPserver.c +++ b/src/H5FPserver.c @@ -529,7 +529,7 @@ done: */ static herr_t H5FP_add_file_mod_to_list(H5FP_file_info *info, H5FD_mem_t mem_type, - haddr_t addr, unsigned rank, unsigned md_size, + haddr_t addr, unsigned md_size, char *metadata) { H5FP_mdata_mod *fm, mod; @@ -711,7 +711,7 @@ H5FP_add_new_file_info_to_list(unsigned file_id, haddr_t maxaddr, */ HDmemset(&info->file, 0, sizeof(info->file)); info->file.pub.driver_id = H5FD_FPHDF5; - info->file.pub.cls = &H5FD_fphdf5_g; + info->file.pub.cls = (const H5FD_class_t *)&H5FD_fphdf5_g; info->file.pub.maxaddr = maxaddr; info->file.pub.accum_loc = HADDR_UNDEF; info->file.pub.feature_flags = feature_flags; @@ -1384,7 +1384,7 @@ H5FP_sap_handle_write_request(H5FP_request_t *req, char *mdata, unsigned md_size } if (H5FP_add_file_mod_to_list(info, req->mem_type, (haddr_t)req->addr, - req->proc_rank, md_size, mdata) != SUCCEED) { + md_size, mdata) != SUCCEED) { exit_state = H5FP_STATUS_OOM; HGOTO_DONE(FAIL); } @@ -1632,7 +1632,9 @@ H5FP_sap_handle_get_eoa_request(H5FP_request_t *req) ret_value = FAIL; } +#ifdef LATER done: +#endif /* LATER */ if ((mrc = MPI_Send(&sap_eoa, 1, H5FP_eoa, (int)req->proc_rank, H5FP_TAG_EOA, H5FP_SAP_COMM)) != MPI_SUCCESS) HMPI_DONE_ERROR(FAIL, "MPI_Send failed", mrc); @@ -1653,7 +1655,6 @@ H5FP_sap_handle_set_eoa_request(H5FP_request_t *req) { H5FP_status_t exit_state = H5FP_STATUS_OK; H5FP_file_info *info; - int mrc; herr_t ret_value = SUCCEED; FUNC_ENTER_NOAPI_NOINIT_NOFUNC(H5FP_sap_handle_set_eoa_request); @@ -1667,7 +1668,9 @@ H5FP_sap_handle_set_eoa_request(H5FP_request_t *req) ret_value = FAIL; } +#ifdef LATER done: +#endif /* LATER */ H5FP_send_reply(req->proc_rank, req->req_id, req->file_id, exit_state); FUNC_LEAVE_NOAPI(ret_value); } diff --git a/src/H5Fcontig.c b/src/H5Fcontig.c index 32f1759..4e19a23 100644 --- a/src/H5Fcontig.c +++ b/src/H5Fcontig.c @@ -152,44 +152,18 @@ H5F_contig_fill(H5F_t *f, hid_t dxpl_id, struct H5O_layout_t *layout, #ifdef H5_HAVE_PARALLEL /* Retrieve MPI parameters */ - if(IS_H5FD_MPIO(f)) { + if(IS_H5FD_MPI(f)) { /* Get the MPI communicator */ - if (MPI_COMM_NULL == (mpi_comm=H5FD_mpio_communicator(f->shared->lf))) + if (MPI_COMM_NULL == (mpi_comm=H5FD_mpi_get_comm(f->shared->lf))) HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI communicator"); /* Get the MPI rank & size */ - if ((mpi_rank=H5FD_mpio_mpi_rank(f->shared->lf))<0) + if ((mpi_rank=H5FD_mpi_get_rank(f->shared->lf))<0) HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank"); /* Set the MPI-capable file driver flag */ using_mpi=1; } /* end if */ - else if(IS_H5FD_MPIPOSIX(f)) { - /* Get the MPI communicator */ - if (MPI_COMM_NULL == (mpi_comm=H5FD_mpiposix_communicator(f->shared->lf))) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI communicator"); - - /* Get the MPI rank & size */ - if ((mpi_rank=H5FD_mpiposix_mpi_rank(f->shared->lf))<0) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank"); - - /* Set the MPI-capable file driver flag */ - using_mpi=1; - } /* end if */ -#ifdef H5_HAVE_FPHDF5 - else if (IS_H5FD_FPHDF5(f)) { - /* Get the FPHDF5 barrier communicator */ - if (MPI_COMM_NULL == (mpi_comm = H5FD_fphdf5_barrier_communicator(f->shared->lf))) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI communicator"); - - /* Get the MPI rank & size */ - if ((mpi_rank = H5FD_fphdf5_mpi_rank(f->shared->lf)) < 0) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank"); - - /* Set the MPI-capable file driver flag */ - using_mpi = 1; - } /* end if */ -#endif /* H5_HAVE_FPHDF5 */ #endif /* H5_HAVE_PARALLEL */ /* Get the number of elements in the dataset's dataspace */ diff --git a/src/H5Fistore.c b/src/H5Fistore.c index 90a9c11..b726c68 100644 --- a/src/H5Fistore.c +++ b/src/H5Fistore.c @@ -2195,45 +2195,19 @@ H5F_istore_allocate(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout, HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get filter callback struct"); #ifdef H5_HAVE_PARALLEL - /* Retrieve up MPI parameters */ - if(IS_H5FD_MPIO(f)) { + /* Retrieve MPI parameters */ + if(IS_H5FD_MPI(f)) { /* Get the MPI communicator */ - if (MPI_COMM_NULL == (mpi_comm=H5FD_mpio_communicator(f->shared->lf))) + if (MPI_COMM_NULL == (mpi_comm=H5FD_mpi_get_comm(f->shared->lf))) HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI communicator"); /* Get the MPI rank & size */ - if ((mpi_rank=H5FD_mpio_mpi_rank(f->shared->lf))<0) + if ((mpi_rank=H5FD_mpi_get_rank(f->shared->lf))<0) HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank"); /* Set the MPI-capable file driver flag */ using_mpi=1; } /* end if */ - else if(IS_H5FD_MPIPOSIX(f)) { - /* Get the MPI communicator */ - if (MPI_COMM_NULL == (mpi_comm=H5FD_mpiposix_communicator(f->shared->lf))) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI communicator"); - - /* Get the MPI rank & size */ - if ((mpi_rank=H5FD_mpiposix_mpi_rank(f->shared->lf))<0) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank"); - - /* Set the MPI-capable file driver flag */ - using_mpi=1; - } /* end else */ -#ifdef H5_HAVE_FPHDF5 - else if (IS_H5FD_FPHDF5(f)) { - /* Get the FPHDF5 barrier communicator */ - if (MPI_COMM_NULL == (mpi_comm = H5FD_fphdf5_barrier_communicator(f->shared->lf))) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI communicator"); - - /* Get the MPI rank & size */ - if ((mpi_rank = H5FD_fphdf5_mpi_rank(f->shared->lf)) < 0) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank"); - - /* Set the MPI-capable file driver flag */ - using_mpi = 1; - } /* end if */ -#endif /* H5_HAVE_FPHDF5 */ #endif /* H5_HAVE_PARALLEL */ /* diff --git a/src/H5Fpkg.h b/src/H5Fpkg.h index 2ef630a..59e2e48 100644 --- a/src/H5Fpkg.h +++ b/src/H5Fpkg.h @@ -186,14 +186,6 @@ struct H5F_t { H5F_mtab_t mtab; /* File mount table */ }; -#ifdef OLD_METADATA_WRITE -#ifdef H5_HAVE_PARALLEL -/* Whether a single process writes metadata */ -H5_DLLVAR hbool_t H5_mpi_1_metawrite_g; -H5_DLLVAR hbool_t H5_mpiposix_1_metawrite_g; -#endif /* H5_HAVE_PARALLEL */ -#endif /* OLD_METADATA_WRITE */ - /* Private functions, not part of the publicly documented API */ #ifdef NOT_YET H5_DLL void H5F_encode_length_unusual(const H5F_t *f, uint8_t **p, uint8_t *l); diff --git a/src/H5Smpio.c b/src/H5Smpio.c index b4d6cae..fca460f 100644 --- a/src/H5Smpio.c +++ b/src/H5Smpio.c @@ -681,7 +681,7 @@ H5S_mpio_spaces_xfer(H5F_t *f, const H5O_layout_t *layout, size_t elmt_size, * Pass buf type, file type to the file driver. Request an MPI type * transfer (instead of an elementary byteblock transfer). */ - if(H5FD_mpio_setup(dxpl_id, mpi_buf_type, mpi_file_type)<0) + if(H5FD_mpi_setup_collective(dxpl_id, mpi_buf_type, mpi_file_type)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O properties"); plist_is_setup=1; @@ -700,7 +700,7 @@ H5S_mpio_spaces_xfer(H5F_t *f, const H5O_layout_t *layout, size_t elmt_size, done: /* Reset the dxpl settings */ if(plist_is_setup) { - if(H5FD_mpio_teardown(dxpl_id)<0) + if(H5FD_mpi_teardown_collective(dxpl_id)<0) HDONE_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "unable to reset dxpl values"); } /* end if */ diff --git a/src/Makefile.in b/src/Makefile.in index c3eca2e..e0ade04 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -32,7 +32,7 @@ DISTCLEAN=libhdf5.settings ## Source and object files for the library (lexicographically)... LIB_SRC=H5.c H5A.c H5AC.c H5B.c H5D.c H5Dio.c H5E.c H5F.c H5Fcontig.c \ H5Fcompact.c H5Fdbg.c H5Fistore.c H5Fseq.c H5FD.c H5FDcore.c \ - H5FDfamily.c H5FDfphdf5.c H5FDgass.c H5FDlog.c H5FDmpio.c \ + H5FDfamily.c H5FDfphdf5.c H5FDgass.c H5FDlog.c H5FDmpi.c H5FDmpio.c \ H5FDmpiposix.c H5FDmulti.c H5FDsec2.c H5FDsrb.c H5FDstdio.c \ H5FDstream.c H5FL.c H5FO.c H5FP.c H5FPclient.c H5FPserver.c H5FS.c \ H5G.c H5Gent.c H5Gnode.c H5Gstab.c H5HG.c H5HGdbg.c H5HL.c H5HLdbg.c \ @@ -55,7 +55,7 @@ MOSTLYCLEAN=H5detect.o H5detect.lo H5detect H5Tinit.o H5Tinit.lo H5Tinit.c ## Public header files (to be installed)... PUB_HDR=H5public.h H5Apublic.h H5ACpublic.h H5Bpublic.h H5Dpublic.h \ H5Epubgen.h H5Epublic.h H5Fpublic.h H5FDpublic.h H5FDcore.h \ - H5FDfamily.h H5FDfphdf5.h H5FDgass.h H5FDlog.h H5FDmpio.h \ + H5FDfamily.h H5FDfphdf5.h H5FDgass.h H5FDlog.h H5FDmpi.h H5FDmpio.h \ H5FDmpiposix.h H5FDmulti.h H5FDsec2.h H5FDsrb.h H5FDstdio.h \ H5FDstream.h H5FPpublic.h H5Gpublic.h H5HGpublic.h H5HLpublic.h \ H5Ipublic.h H5MMpublic.h H5Opublic.h H5Ppublic.h H5Rpublic.h \ @@ -44,11 +44,9 @@ /* Predefined file drivers */ #include "H5FDcore.h" /* Files stored entirely in memory */ #include "H5FDfamily.h" /* File families */ -#include "H5FDfphdf5.h" /* Flexible Parallel HDF5 */ #include "H5FDgass.h" /* Remote files using GASS I/O */ #include "H5FDlog.h" /* sec2 driver with I/O logging (for debugging) */ -#include "H5FDmpio.h" /* Parallel files using MPI-2 I/O */ -#include "H5FDmpiposix.h" /* Parallel files using combination MPI-2 & posix I/O */ +#include "H5FDmpi.h" /* MPI-based file drivers */ #include "H5FDmulti.h" /* Usage-partitioned file family */ #include "H5FDsec2.h" /* POSIX unbuffered file I/O */ #include "H5FDsrb.h" /* Remote access using SRB */ |