summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/H5D.c38
-rw-r--r--src/H5Distore.c44
-rw-r--r--src/H5Dseq.c79
-rw-r--r--src/H5F.c5
-rw-r--r--src/H5FD.c2
-rw-r--r--src/H5FDmpio.c4
-rw-r--r--src/H5FDmpiposix.c1135
-rw-r--r--src/H5FDmpiposix.h62
-rw-r--r--src/H5Farray.c76
-rw-r--r--src/H5Fistore.c44
-rw-r--r--src/H5Fpkg.h1
-rw-r--r--src/H5Fseq.c79
-rw-r--r--src/Makefile.in14
-rw-r--r--src/hdf5.h11
14 files changed, 1427 insertions, 167 deletions
diff --git a/src/H5D.c b/src/H5D.c
index d125bd5..e3fb8e1 100644
--- a/src/H5D.c
+++ b/src/H5D.c
@@ -30,10 +30,12 @@
/*#define H5D_DEBUG*/
/*
- * The MPIO driver is needed because there are kludges in this file and
- * places where we check for things that aren't handled by this driver.
+ * The MPIO & MPIPOSIX drivers are needed because there are kludges in this
+ * file and places where we check for things that aren't handled by these
+ * drivers.
*/
#include "H5FDmpio.h"
+#include "H5FDmpiposix.h"
#ifdef H5_HAVE_PARALLEL
/* Remove this if H5R_DATASET_REGION is no longer used in this file */
@@ -1541,8 +1543,8 @@ H5D_create(H5G_entry_t *loc, const char *name, const H5T_t *type,
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, NULL, "unable to locate insertion point");
#ifdef H5_HAVE_PARALLEL
- /* If MPIO is used, no filter support yet. */
- if(IS_H5FD_MPIO(f) && dcpl_pline.nfilters > 0)
+ /* If MPIO or MPIPOSIX is used, no filter support yet. */
+ if((IS_H5FD_MPIO(f) || IS_H5FD_MPIPOSIX(f)) && dcpl_pline.nfilters > 0)
HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, NULL, "Parallel I/O does not support filters yet");
#endif /*H5_HAVE_PARALLEL*/
@@ -2018,8 +2020,8 @@ H5D_open_oid(H5G_entry_t *ent)
HGOTO_ERROR(H5E_DATASET, H5E_CANTSET, NULL, "can't set pipeline");
#ifdef H5_HAVE_PARALLEL
- /* If MPIO is used, no filter support yet. */
- if(IS_H5FD_MPIO(dataset->ent.file) && pline.nfilters > 0)
+ /* If MPIO or MPIPOSIX is used, no filter support yet. */
+ if((IS_H5FD_MPIO(dataset->ent.file) || IS_H5FD_MPIPOSIX(dataset->ent.file)) && pline.nfilters > 0)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, NULL, "Parallel IO does not support filters yet");
#endif /*H5_HAVE_PARALLEL*/
@@ -2224,7 +2226,7 @@ H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
H5S_t *free_this_space=NULL; /*data space to free */
#ifdef H5_HAVE_PARALLEL
H5FD_mpio_dxpl_t *dx = NULL;
- H5FD_mpio_xfer_t xfer_mode; /*xfer_mode for this request */
+ H5FD_mpio_xfer_t xfer_mode=H5FD_MPIO_INDEPENDENT; /*xfer_mode for this request */
hbool_t xfer_mode_changed=0; /*xfer_mode needs restore */
hbool_t doing_mpio=0; /*This is an MPIO access */
#endif /*H5_HAVE_PARALLEL*/
@@ -2280,10 +2282,10 @@ H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
xfer_mode = dx->xfer_mode;
}
} /* end if */
- /* Collective access is not permissible without the MPIO driver */
+ /* Collective access is not permissible without the MPIO or MPIPOSIX driver */
if (doing_mpio && xfer_mode==H5FD_MPIO_COLLECTIVE &&
- !(IS_H5FD_MPIO(dataset->ent.file)))
- HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPIO driver only");
+ !(IS_H5FD_MPIO(dataset->ent.file) || IS_H5FD_MPIPOSIX(dataset->ent.file)))
+ HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPIO & MPIPOSIX drivers only");
/* Set the "parallel I/O possible" flag, for H5S_find() */
if (H5S_mpi_opt_types_g && IS_H5FD_MPIO(dataset->ent.file)) {
@@ -2640,7 +2642,7 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
H5S_t *free_this_space=NULL; /*data space to free */
#ifdef H5_HAVE_PARALLEL
H5FD_mpio_dxpl_t *dx = NULL;
- H5FD_mpio_xfer_t xfer_mode; /*xfer_mode for this request */
+ H5FD_mpio_xfer_t xfer_mode=H5FD_MPIO_INDEPENDENT; /*xfer_mode for this request */
hbool_t xfer_mode_changed=0; /*xfer_mode needs restore */
hbool_t doing_mpio=0; /*This is an MPIO access */
#endif /*H5_HAVE_PARALLEL*/
@@ -2677,17 +2679,17 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset creation property list");
#ifdef H5_HAVE_PARALLEL
- /* If MPIO is used, no VL datatype support yet. */
+ /* If MPIO or MPIPOSIX is used, no VL datatype support yet. */
/* This is because they use the global heap in the file and we don't */
/* support parallel access of that yet */
- if (IS_H5FD_MPIO(dataset->ent.file) && H5T_get_class(mem_type)==H5T_VLEN)
+ if ( (IS_H5FD_MPIO(dataset->ent.file) || IS_H5FD_MPIPOSIX(dataset->ent.file)) && H5T_get_class(mem_type)==H5T_VLEN)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing VL datatypes yet");
#endif /*H5_HAVE_PARALLEL*/
#ifdef H5_HAVE_PARALLEL
- /* If MPIO is used, no dataset region reference support yet. */
+ /* If MPIO or MPIPOSIX is used, no dataset region reference datatype support yet. */
/* This is because they use the global heap in the file and we don't */
/* support parallel access of that yet */
- if (IS_H5FD_MPIO(dataset->ent.file) &&
+ if ((IS_H5FD_MPIO(dataset->ent.file) || IS_H5FD_MPIPOSIX(dataset->ent.file)) &&
H5T_get_class(mem_type)==H5T_REFERENCE &&
H5T_get_ref_type(mem_type)==H5R_DATASET_REGION)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing region reference datatypes yet");
@@ -2715,9 +2717,9 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
xfer_mode = dx->xfer_mode;
}
} /* end if */
- /* Collective access is not permissible without the MPIO driver */
+ /* Collective access is not permissible without the MPIO or MPIPOSIX driver */
if (doing_mpio && xfer_mode==H5FD_MPIO_COLLECTIVE &&
- !(IS_H5FD_MPIO(dataset->ent.file)))
+ !(IS_H5FD_MPIO(dataset->ent.file) || IS_H5FD_MPIPOSIX(dataset->ent.file)))
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPIO driver only");
/* Set the "parallel I/O possible" flag, for H5S_find() */
@@ -3269,7 +3271,7 @@ H5D_init_storage(H5D_t *dset, const H5S_t *space)
* If the dataset is accessed via parallel I/O, allocate file space
* for all chunks now and initialize each chunk with the fill value.
*/
- if (IS_H5FD_MPIO(dset->ent.file)) {
+ if (IS_H5FD_MPIO(dset->ent.file) || IS_H5FD_MPIPOSIX(dset->ent.file)) {
/* We only handle simple data spaces so far */
int ndims;
hsize_t dim[H5O_LAYOUT_NDIMS];
diff --git a/src/H5Distore.c b/src/H5Distore.c
index a44ce5a..a9f7f56 100644
--- a/src/H5Distore.c
+++ b/src/H5Distore.c
@@ -45,8 +45,9 @@
#include "H5Sprivate.h" /* Dataspaces */
#include "H5Vprivate.h"
-/* MPIO driver needed for special checks */
+/* MPIO & MPIPOSIX drivers needed for special checks */
#include "H5FDmpio.h"
+#include "H5FDmpiposix.h"
/*
* Feature: If this constant is defined then every cache preemption and load
@@ -1781,12 +1782,12 @@ H5F_istore_read(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
#ifdef H5_HAVE_PARALLEL
/*
- * If MPIO is used and file can be written to, we must bypass the
+ * If MPIO or MPIPOSIX is used and file can be written to, we must bypass the
* chunk-cache scheme because other MPI processes could be writing to
* other elements in the same chunk.
* Do a direct write-through of only the elements requested.
*/
- || (IS_H5FD_MPIO(f) && (H5F_ACC_RDWR & f->shared->flags))
+ || ((IS_H5FD_MPIO(f) ||IS_H5FD_MPIPOSIX(f)) && (H5F_ACC_RDWR & f->shared->flags))
#endif /* H5_HAVE_PARALLEL */
) {
H5O_layout_t l; /* temporary layout */
@@ -1965,11 +1966,11 @@ H5F_istore_write(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
#ifdef H5_HAVE_PARALLEL
/*
- * If MPIO is used, must bypass the chunk-cache scheme because other
+ * If MPIO or MPIPOSIX is used, must bypass the chunk-cache scheme because other
* MPI processes could be writing to other elements in the same chunk.
* Do a direct write-through of only the elements requested.
*/
- || (IS_H5FD_MPIO(f) && (H5F_ACC_RDWR & f->shared->flags))
+ || ((IS_H5FD_MPIO(f) ||IS_H5FD_MPIPOSIX(f)) && (H5F_ACC_RDWR & f->shared->flags))
#endif /* H5_HAVE_PARALLEL */
) {
H5O_layout_t l; /* temporary layout */
@@ -2416,10 +2417,22 @@ H5F_istore_allocate(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
} /* end if */
/* Retrieve up MPI parameters */
- if ((mpi_rank=H5FD_mpio_mpi_rank(f->shared->lf))<0)
- HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank");
- if ((mpi_size=H5FD_mpio_mpi_size(f->shared->lf))<0)
- HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI size");
+ if(IS_H5FD_MPIO(f)) {
+ if ((mpi_rank=H5FD_mpio_mpi_rank(f->shared->lf))<0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank");
+ if ((mpi_size=H5FD_mpio_mpi_size(f->shared->lf))<0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI size");
+ } /* end if */
+ else {
+ /* Sanity Check */
+ assert(IS_H5FD_MPIPOSIX(f));
+
+ /* Get the MPI rank & size */
+ if ((mpi_rank=H5FD_mpiposix_mpi_rank(f->shared->lf))<0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank");
+ if ((mpi_size=H5FD_mpiposix_mpi_size(f->shared->lf))<0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI size");
+ } /* end else */
/* Loop over all chunks */
carry=0;
@@ -2469,8 +2482,17 @@ H5F_istore_allocate(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
* still writing out chunks and other processes race ahead to read
* them in, getting bogus data.
*/
- if (MPI_Barrier(H5FD_mpio_communicator(f->shared->lf)))
- HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_Barrier failed");
+ if(IS_H5FD_MPIO(f)) {
+ if (MPI_Barrier(H5FD_mpio_communicator(f->shared->lf)))
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_Barrier failed");
+ } /* end if */
+ else {
+ /* Sanity Check */
+ assert(IS_H5FD_MPIPOSIX(f));
+
+ if (MPI_Barrier(H5FD_mpiposix_communicator(f->shared->lf)))
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_Barrier failed");
+ } /* end else */
} /* end if */
done:
diff --git a/src/H5Dseq.c b/src/H5Dseq.c
index 0a4da84..8f988ff 100644
--- a/src/H5Dseq.c
+++ b/src/H5Dseq.c
@@ -26,8 +26,9 @@
#include "H5Pprivate.h"
#include "H5Vprivate.h"
-/* MPIO driver functions are needed for some special checks */
+/* MPIO & MPIPOSIX driver functions are needed for some special checks */
#include "H5FDmpio.h"
+#include "H5FDmpiposix.h"
/* Interface initialization */
#define PABLO_MASK H5Fseq_mask
@@ -182,27 +183,29 @@ H5F_seq_readv(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
#ifdef H5_HAVE_PARALLEL
{
- /* Get the transfer mode */
H5FD_mpio_dxpl_t *dx;
hid_t driver_id; /* VFL driver ID */
- /* Get the plist structure */
- if(NULL == (plist = H5I_object(dxpl_id)))
- HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID");
-
- /* Get the driver ID */
- if(H5P_get(plist, H5D_XFER_VFL_ID_NAME, &driver_id)<0)
- HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver ID");
-
- /* Check if we are using the MPIO driver */
- if(H5FD_MPIO==driver_id) {
- /* Get the driver information */
- if(H5P_get(plist, H5D_XFER_VFL_INFO_NAME, &dx)<0)
- HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver info");
-
- /* Check if we are not using independent I/O */
- if(H5FD_MPIO_INDEPENDENT!=dx->xfer_mode)
- xfer_mode = dx->xfer_mode;
+ /* Get the transfer mode for MPIO transfers */
+ if(IS_H5FD_MPIO(f)) {
+ /* Get the plist structure */
+ if(NULL == (plist = H5I_object(dxpl_id)))
+ HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID");
+
+ /* Get the driver ID */
+ if(H5P_get(plist, H5D_XFER_VFL_ID_NAME, &driver_id)<0)
+ HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver ID");
+
+ /* Check if we are using the MPIO driver (for the DXPL) */
+ if(H5FD_MPIO==driver_id) {
+ /* Get the driver information */
+ if(H5P_get(plist, H5D_XFER_VFL_INFO_NAME, &dx)<0)
+ HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver info");
+
+ /* Check if we are not using independent I/O */
+ if(H5FD_MPIO_INDEPENDENT!=dx->xfer_mode)
+ xfer_mode = dx->xfer_mode;
+ } /* end if */
} /* end if */
}
@@ -564,27 +567,29 @@ H5F_seq_writev(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
#ifdef H5_HAVE_PARALLEL
{
- /* Get the transfer mode */
H5FD_mpio_dxpl_t *dx;
hid_t driver_id; /* VFL driver ID */
- /* Get the plist structure */
- if(NULL == (plist = H5I_object(dxpl_id)))
- HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID");
-
- /* Get the driver ID */
- if(H5P_get(plist, H5D_XFER_VFL_ID_NAME, &driver_id)<0)
- HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver ID");
-
- /* Check if we are using the MPIO driver */
- if(H5FD_MPIO==driver_id) {
- /* Get the driver information */
- if(H5P_get(plist, H5D_XFER_VFL_INFO_NAME, &dx)<0)
- HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver info");
-
- /* Check if we are not using independent I/O */
- if(H5FD_MPIO_INDEPENDENT!=dx->xfer_mode)
- xfer_mode = dx->xfer_mode;
+ /* Get the transfer mode for MPIO transfers */
+ if(IS_H5FD_MPIO(f)) {
+ /* Get the plist structure */
+ if(NULL == (plist = H5I_object(dxpl_id)))
+ HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID");
+
+ /* Get the driver ID */
+ if(H5P_get(plist, H5D_XFER_VFL_ID_NAME, &driver_id)<0)
+ HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver ID");
+
+ /* Check if we are using the MPIO driver (for the DXPL) */
+ if(H5FD_MPIO==driver_id) {
+ /* Get the driver information */
+ if(H5P_get(plist, H5D_XFER_VFL_INFO_NAME, &dx)<0)
+ HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver info");
+
+ /* Check if we are not using independent I/O */
+ if(H5FD_MPIO_INDEPENDENT!=dx->xfer_mode)
+ xfer_mode = dx->xfer_mode;
+ } /* end if */
} /* end if */
}
diff --git a/src/H5F.c b/src/H5F.c
index 1e6f3a7..68f82cf 100644
--- a/src/H5F.c
+++ b/src/H5F.c
@@ -22,6 +22,7 @@
#include "H5FDcore.h" /*temporary in-memory files */
#include "H5FDfamily.h" /*family of files */
#include "H5FDmpio.h" /*MPI-2 I/O */
+#include "H5FDmpiposix.h" /*MPI-2 & posix I/O */
#include "H5FDgass.h" /*GASS I/O */
#include "H5FDstream.h" /*in-memory files streamed via sockets */
#include "H5FDsrb.h" /*SRB I/O */
@@ -215,7 +216,7 @@ H5F_init_interface(void)
/* Allow MPI buf-and-file-type optimizations? */
const char *s = HDgetenv ("HDF5_MPI_1_METAWRITE");
if (s && HDisdigit(*s)) {
- H5_mpi_1_metawrite_g = (int)HDstrtol (s, NULL, 0);
+ H5_mpiposix_1_metawrite_g = H5_mpi_1_metawrite_g = (int)HDstrtol (s, NULL, 0);
}
}
#endif /* H5_HAVE_PARALLEL */
@@ -304,6 +305,7 @@ H5F_init_interface(void)
if ((status=H5FD_MULTI)<0) goto end_registration;
#ifdef H5_HAVE_PARALLEL
if ((status=H5FD_MPIO)<0) goto end_registration;
+ if ((status=H5FD_MPIPOSIX)<0) goto end_registration;
#endif /* H5_HAVE_PARALLEL */
#ifdef H5_HAVE_STREAM
if ((status=H5FD_STREAM)<0) goto end_registration;
@@ -2571,7 +2573,6 @@ H5F_close(H5F_t *f)
{
H5F_close_degree_t fc_degree; /* What action to take when closing the last file ID for a file */
hid_t *oid_list; /* List of IDs still open */
- unsigned oid_count; /* Number of IDs still open */
unsigned i; /* Local index variable */
unsigned closing=0; /* Indicate that the file will be closed */
herr_t ret_value = SUCCEED; /* Return value */
diff --git a/src/H5FD.c b/src/H5FD.c
index d905fdf..fb25da8 100644
--- a/src/H5FD.c
+++ b/src/H5FD.c
@@ -1611,7 +1611,7 @@ H5FD_real_alloc(H5FD_t *file, H5FD_mem_t type, hsize_t size)
}
} else {
hsize_t wasted;
- haddr_t oldeoa;
+ haddr_t oldeoa=0;
haddr_t eoa = (file->cls->get_eoa)(file);
#ifdef H5F_DEBUG
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index 7f36862..df4b95c 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -72,9 +72,9 @@ static haddr_t H5FD_mpio_get_eoa(H5FD_t *_file);
static herr_t H5FD_mpio_set_eoa(H5FD_t *_file, haddr_t addr);
static haddr_t H5FD_mpio_get_eof(H5FD_t *_file);
static herr_t H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id, haddr_t addr,
- size_t size, void *buf);
+ size_t size, void *buf);
static herr_t H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id, haddr_t addr,
- size_t size, const void *buf);
+ size_t size, const void *buf);
static herr_t H5FD_mpio_flush(H5FD_t *_file, unsigned closing);
/* MPIO-specific file access properties */
diff --git a/src/H5FDmpiposix.c b/src/H5FDmpiposix.c
new file mode 100644
index 0000000..82533d0
--- /dev/null
+++ b/src/H5FDmpiposix.c
@@ -0,0 +1,1135 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Copyright by the Board of Trustees of the University of Illinois. *
+ * All rights reserved. *
+ * *
+ * This file is part of HDF5. The full HDF5 copyright notice, including *
+ * terms governing use, modification, and redistribution, is contained in *
+ * the files COPYING and Copyright.html. COPYING can be found at the root *
+ * of the source code distribution tree; Copyright.html can be found at the *
+ * root level of an installed copy of the electronic HDF5 document set and *
+ * is linked from the top-level documents page. It can also be found at *
+ * http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html. If you do not have *
+ * access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/*
+ * Programmer: Quincey Koziol <koziol@ncsa.uiuc.ed>
+ * Thursday, July 11, 2002
+ *
+ * Purpose: This is a "combination" MPI-2 and posix I/O driver.
+ * It uses MPI for coodinating the actions of several processes
+ * and posix I/O calls to do the actual I/O to the disk.
+ *
+ * This driver was derived from the H5FDmpio.c driver and may
+ * share bugs/quirks/etc.
+ *
+ * Limitations:
+ * There is no "collective" I/O mode with this driver.
+ *
+ * This will almost certainly _not_ work correctly for files
+ * accessed on distributed parallel systems with the file located
+ * on a non-parallel filesystem.
+ *
+ */
+#include "H5private.h" /*library functions */
+#include "H5Eprivate.h" /*error handling */
+#include "H5Fprivate.h" /*files */
+#include "H5FDprivate.h" /*file driver */
+#include "H5FDmpiposix.h" /* MPI/posix I/O file driver */
+#include "H5Iprivate.h" /* IDs */
+#include "H5MMprivate.h" /*memory allocation */
+#include "H5Pprivate.h" /*property lists */
+
+/*
+ * The driver identification number, initialized at runtime if H5_HAVE_PARALLEL
+ * is defined. This allows applications to still have the H5FD_MPIPOSIX
+ * "constants" in their source code (it also makes this file strictly ANSI
+ * compliant when H5_HAVE_PARALLEL isn't defined)
+ */
+static hid_t H5FD_MPIPOSIX_g = 0;
+
+#ifdef H5_HAVE_PARALLEL
+
+/* File operations */
+#define OP_UNKNOWN 0
+#define OP_READ 1
+#define OP_WRITE 2
+
+/*
+ * The description of a file belonging to this driver.
+ * The EOF value
+ * is only used just after the file is opened in order for the library to
+ * determine whether the file is empty, truncated, or okay. The MPIPOSIX driver
+ * doesn't bother to keep it updated since it's an expensive operation.
+ */
+typedef struct H5FD_mpiposix_t {
+ H5FD_t pub; /*public stuff, must be first */
+ int fd; /*the unix file handle */
+ MPI_Comm comm; /*communicator */
+ int mpi_rank; /* This process's rank */
+ int mpi_size; /* Total number of processes */
+ int mpi_round; /* Current round robin process (for metadata I/O) */
+ haddr_t eof; /*end-of-file marker */
+ haddr_t eoa; /*end-of-address marker */
+ haddr_t last_eoa; /* Last known end-of-address marker */
+ haddr_t pos; /* Current file I/O position */
+ int op; /* Last file I/O operation */
+#ifndef WIN32
+ /*
+ * On most systems the combination of device and i-node number uniquely
+ * identify a file.
+ */
+ dev_t device; /*file device number */
+ ino_t inode; /*file i-node number */
+#else
+ /*
+ * On WIN32 the low-order word of a unique identifier associated with the
+ * file and the volume serial number uniquely identify a file. This number
+ * (which, both? -rpm) may change when the system is restarted or when the
+ * file is opened. After a process opens a file, the identifier is
+ * constant until the file is closed. An application can use this
+ * identifier and the volume serial number to determine whether two
+ * handles refer to the same file.
+ */
+ int fileindexlo;
+ int fileindexhi;
+#endif
+} H5FD_mpiposix_t;
+
+/*
+ * This driver supports systems that have the lseek64() function by defining
+ * some macros here so we don't have to have conditional compilations later
+ * throughout the code.
+ *
+ * file_offset_t: The datatype for file offsets, the second argument of
+ * the lseek() or lseek64() call.
+ *
+ * file_seek: The function which adjusts the current file position,
+ * either lseek() or lseek64().
+ */
+/* adding for windows NT file system support. */
+/* pvn: added __MWERKS__ support. */
+
+#ifdef H5_HAVE_LSEEK64
+# define file_offset_t off64_t
+# define file_seek lseek64
+# define file_truncate ftruncate64
+#elif defined (WIN32) && !defined(__MWERKS__)
+# /*MSVC*/
+# define file_offset_t __int64
+# define file_seek _lseeki64
+# define file_truncate _ftruncatei64
+#else
+# define file_offset_t off_t
+# define file_seek HDlseek
+# define file_truncate HDftruncate
+#endif
+
+/*
+ * These macros check for overflow of various quantities. These macros
+ * assume that file_offset_t is signed and haddr_t and size_t are unsigned.
+ *
+ * ADDR_OVERFLOW: Checks whether a file address of type `haddr_t'
+ * is too large to be represented by the second argument
+ * of the file seek function.
+ *
+ * SIZE_OVERFLOW: Checks whether a buffer size of type `hsize_t' is too
+ * large to be represented by the `size_t' type.
+ *
+ * REGION_OVERFLOW: Checks whether an address and size pair describe data
+ * which can be addressed entirely by the second
+ * argument of the file seek function.
+ */
+#define MAXADDR (((haddr_t)1<<(8*sizeof(file_offset_t)-1))-1)
+#define ADDR_OVERFLOW(A) (HADDR_UNDEF==(A) || \
+ ((A) & ~(haddr_t)MAXADDR))
+#define SIZE_OVERFLOW(Z) ((Z) & ~(hsize_t)MAXADDR)
+#define REGION_OVERFLOW(A,Z) (ADDR_OVERFLOW(A) || SIZE_OVERFLOW(Z) || \
+ sizeof(file_offset_t)<sizeof(size_t) || \
+ HADDR_UNDEF==(A)+(Z) || \
+ (file_offset_t)((A)+(Z))<(file_offset_t)(A))
+
+/* Callbacks */
+static void *H5FD_mpiposix_fapl_get(H5FD_t *_file);
+static H5FD_t *H5FD_mpiposix_open(const char *name, unsigned flags, hid_t fapl_id,
+ haddr_t maxaddr);
+static herr_t H5FD_mpiposix_close(H5FD_t *_file);
+static int H5FD_mpiposix_cmp(const H5FD_t *_f1, const H5FD_t *_f2);
+static herr_t H5FD_mpiposix_query(const H5FD_t *_f1, unsigned long *flags);
+static haddr_t H5FD_mpiposix_get_eoa(H5FD_t *_file);
+static herr_t H5FD_mpiposix_set_eoa(H5FD_t *_file, haddr_t addr);
+static haddr_t H5FD_mpiposix_get_eof(H5FD_t *_file);
+static herr_t H5FD_mpiposix_read(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id, haddr_t addr,
+ size_t size, void *buf);
+static herr_t H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id, haddr_t addr,
+ size_t size, const void *buf);
+static herr_t H5FD_mpiposix_flush(H5FD_t *_file, unsigned closing);
+
+/* MPIPOSIX-specific file access properties */
+typedef struct H5FD_mpiposix_fapl_t {
+ MPI_Comm comm; /*communicator */
+} H5FD_mpiposix_fapl_t;
+
+/* The MPIPOSIX file driver information */
+static const H5FD_class_t H5FD_mpiposix_g = {
+ "mpiposix", /*name */
+ MAXADDR, /*maxaddr */
+ H5F_CLOSE_SEMI, /* fc_degree */
+ NULL, /*sb_size */
+ NULL, /*sb_encode */
+ NULL, /*sb_decode */
+ sizeof(H5FD_mpiposix_fapl_t), /*fapl_size */
+ H5FD_mpiposix_fapl_get, /*fapl_get */
+ NULL, /*fapl_copy */
+ NULL, /*fapl_free */
+ 0, /*dxpl_size */
+ NULL, /*dxpl_copy */
+ NULL, /*dxpl_free */
+ H5FD_mpiposix_open, /*open */
+ H5FD_mpiposix_close, /*close */
+ H5FD_mpiposix_cmp, /*cmp */
+ H5FD_mpiposix_query, /*query */
+ NULL, /*alloc */
+ NULL, /*free */
+ H5FD_mpiposix_get_eoa, /*get_eoa */
+ H5FD_mpiposix_set_eoa, /*set_eoa */
+ H5FD_mpiposix_get_eof, /*get_eof */
+ H5FD_mpiposix_read, /*read */
+ H5FD_mpiposix_write, /*write */
+ H5FD_mpiposix_flush, /*flush */
+ H5FD_FLMAP_SINGLE, /*fl_map */
+};
+
+/* Global var to allow elimination of redundant metadata writes
+ * to be controlled by the value of an environment variable. */
+/* Use the elimination by default unless this is the Intel Red machine */
+#ifndef __PUMAGON__
+hbool_t H5_mpiposix_1_metawrite_g = TRUE;
+#else
+hbool_t H5_mpiposix_1_metawrite_g = FALSE;
+#endif
+
+/* Interface initialization */
+#define PABLO_MASK H5FD_mpiposix_mask
+#define INTERFACE_INIT H5FD_mpiposix_init
+static int interface_initialize_g = 0;
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_mpiposix_init
+ *
+ * Purpose: Initialize this driver by registering the driver with the
+ * library.
+ *
+ * Return: Success: The driver ID for the mpiposix driver.
+ *
+ * Failure: Negative.
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+hid_t
+H5FD_mpiposix_init(void)
+{
+ FUNC_ENTER_NOAPI(H5FD_mpiposix_init, FAIL);
+
+ if (H5I_VFL!=H5Iget_type(H5FD_MPIPOSIX_g))
+ H5FD_MPIPOSIX_g = H5FDregister(&H5FD_mpiposix_g);
+
+ FUNC_LEAVE(H5FD_MPIPOSIX_g);
+} /* end H5FD_mpiposix_init() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5Pset_fapl_mpiposix
+ *
+ * Purpose: Store the user supplied MPI communicator COMM in
+ * the file access property list FAPL_ID which can then be used
+ * to create and/or open the file. This function is available
+ * only in the parallel HDF5 library and is not collective.
+ *
+ * COMM is the MPI communicator to be used for file open as
+ * defined in MPI_FILE_OPEN of MPI-2. This function does not
+ * make a duplicated communicator. Any modification to COMM
+ * after this function call returns may have undetermined effect
+ * on the access property list. Users should not modify the
+ * communicator while it is defined in a property list.
+ *
+ * Return: Success: Non-negative
+ * Failure: Negative
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5Pset_fapl_mpiposix(hid_t fapl_id, MPI_Comm comm)
+{
+ H5FD_mpiposix_fapl_t fa;
+ H5P_genplist_t *plist; /* Property list pointer */
+ herr_t ret_value=FAIL;
+
+ FUNC_ENTER_API(H5Pset_fapl_mpiposix, FAIL);
+ H5TRACE2("e","iMc",fapl_id,comm);
+
+ /* Check arguments */
+ if(TRUE!=H5P_isa_class(fapl_id,H5P_FILE_ACCESS) || NULL == (plist = H5I_object(fapl_id)))
+ HRETURN_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a file access list");
+#ifdef LATER
+#warning "We need to verify that COMM contains sensible information."
+#endif
+
+ /* Initialize driver specific properties */
+ fa.comm = comm;
+
+ ret_value= H5P_set_driver(plist, H5FD_MPIPOSIX, &fa);
+
+ FUNC_LEAVE(ret_value);
+} /* end H5Pset_fapl_mpiposix() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5Pget_fapl_mpiposix
+ *
+ * Purpose: If the file access property list is set to the H5FD_MPIPOSIX
+ * driver then this function returns the MPI communicator and
+ * information through the COMM pointer.
+ *
+ * Return: Success: Non-negative with the communicator and
+ * information returned through the COMM
+ * argument if non-null. This piece of
+ * information is copied and is therefore
+ * valid only until the file access property
+ * list is modified or closed.
+ *
+ * Failure: Negative
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5Pget_fapl_mpiposix(hid_t fapl_id, MPI_Comm *comm/*out*/)
+{
+ H5FD_mpiposix_fapl_t *fa;
+ H5P_genplist_t *plist; /* Property list pointer */
+
+ FUNC_ENTER_API(H5Pget_fapl_mpiposix, FAIL);
+ H5TRACE2("e","ix",fapl_id,comm);
+
+ if(TRUE!=H5P_isa_class(fapl_id,H5P_FILE_ACCESS) || NULL == (plist = H5I_object(fapl_id)))
+ HRETURN_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a file access list");
+ if (H5FD_MPIPOSIX!=H5P_get_driver(plist))
+ HRETURN_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "incorrect VFL driver");
+ if (NULL==(fa=H5P_get_driver_info(plist)))
+ HRETURN_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "bad VFL driver info");
+
+ /* Get MPI Communicator */
+ if (comm)
+ *comm = fa->comm;
+
+ FUNC_LEAVE(SUCCEED);
+} /* end H5Pget_fapl_mpiposix() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_mpiposix_communicator
+ *
+ * Purpose: Returns the MPI communicator for the file.
+ *
+ * Return: Success: The communicator
+ *
+ * Failure: NULL
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+MPI_Comm
+H5FD_mpiposix_communicator(H5FD_t *_file)
+{
+ H5FD_mpiposix_t *file = (H5FD_mpiposix_t*)_file;
+
+ FUNC_ENTER_NOAPI(H5FD_mpiposix_communicator, MPI_COMM_NULL);
+
+ assert(file);
+ assert(H5FD_MPIPOSIX==file->pub.driver_id);
+
+ FUNC_LEAVE(file->comm);
+} /* end H5FD_mpi_posix_communicator() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_mpiposix_mpi_rank
+ *
+ * Purpose: Returns the MPI rank for a process
+ *
+ * Return: Success: non-negative
+ * Failure: negative
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+int
+H5FD_mpiposix_mpi_rank(H5FD_t *_file)
+{
+ H5FD_mpiposix_t *file = (H5FD_mpiposix_t*)_file;
+
+ FUNC_ENTER_NOAPI(H5FD_mpiposix_mpi_rank, FAIL);
+
+ assert(file);
+ assert(H5FD_MPIPOSIX==file->pub.driver_id);
+
+ FUNC_LEAVE(file->mpi_rank);
+} /* end H5FD_mpiposix_mpi_rank() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_mpiposix_mpi_size
+ *
+ * Purpose: Returns the number of MPI processes
+ *
+ * Return: Success: non-negative
+ * Failure: negative
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+int
+H5FD_mpiposix_mpi_size(H5FD_t *_file)
+{
+ H5FD_mpiposix_t *file = (H5FD_mpiposix_t*)_file;
+
+ FUNC_ENTER_NOAPI(H5FD_mpiposix_mpi_rank, FAIL);
+
+ assert(file);
+ assert(H5FD_MPIPOSIX==file->pub.driver_id);
+
+ FUNC_LEAVE(file->mpi_size);
+} /* end H5FD_mpiposix_mpi_size() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_mpiposix_fapl_get
+ *
+ * Purpose: Returns a file access property list which could be used to
+ * create another file the same as this one.
+ *
+ * Return: Success: Ptr to new file access property list with all
+ * fields copied from the file pointer.
+ *
+ * Failure: NULL
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static void *
+H5FD_mpiposix_fapl_get(H5FD_t *_file)
+{
+ H5FD_mpiposix_t *file = (H5FD_mpiposix_t*)_file;
+ H5FD_mpiposix_fapl_t *fa = NULL;
+
+ FUNC_ENTER_NOAPI(H5FD_mpiposix_fapl_get, NULL);
+
+ assert(file);
+ assert(H5FD_MPIPOSIX==file->pub.driver_id);
+
+ if (NULL==(fa=H5MM_calloc(sizeof(H5FD_mpiposix_fapl_t))))
+ HRETURN_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed");
+
+ /* These should be copied. --QAK, 2002-07-11 */
+ fa->comm = file->comm;
+
+ FUNC_LEAVE(fa);
+} /* end H5FD_mpiposix_fapl_get() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_mpiposix_open
+ *
+ * Purpose: Opens a file with name NAME. The FLAGS are a bit field with
+ * purpose similar to the second argument of open(2) and which
+ * are defined in H5Fpublic.h. The file access property list
+ * FAPL_ID contains the properties driver properties and MAXADDR
+ * is the largest address which this file will be expected to
+ * access. This is collective.
+ *
+ * Return: Success: A new file pointer.
+ * Failure: NULL
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static H5FD_t *
+H5FD_mpiposix_open(const char *name, unsigned flags, hid_t fapl_id,
+ haddr_t maxaddr)
+{
+ H5FD_mpiposix_t *file=NULL; /* New MPIPOSIX file struct */
+ int o_flags; /* Flags for file open call */
+ int fd=(-1); /* File handle for file opened */
+ int mpi_rank; /* MPI rank of this process */
+ int mpi_size; /* Total number of MPI processes */
+ int mpi_code; /* mpi return code */
+ const H5FD_mpiposix_fapl_t *fa=NULL; /* MPIPOSIX file access property list information */
+ H5FD_mpiposix_fapl_t _fa; /* Private copy of default file access property list information */
+ H5P_genplist_t *plist; /* Property list pointer */
+ h5_stat_t sb; /* Portable 'stat' struct */
+#ifdef WIN32
+ HFILE filehandle;
+ struct _BY_HANDLE_FILE_INFORMATION fileinfo;
+ int results;
+#endif
+ H5FD_t *ret_value=NULL; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5FD_mpiposix_open, NULL);
+
+ /* Check arguments */
+ if (!name || !*name)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, "invalid file name");
+ if (0==maxaddr || HADDR_UNDEF==maxaddr)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADRANGE, NULL, "bogus maxaddr");
+ if (ADDR_OVERFLOW(maxaddr))
+ HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, NULL, "bogus maxaddr");
+
+ /* Obtain a pointer to mpiposix-specific file access properties */
+ if(TRUE!=H5P_isa_class(fapl_id,H5P_FILE_ACCESS) || NULL == (plist = H5I_object(fapl_id)))
+ HRETURN_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "not a file access property list");
+ if (H5P_DEFAULT==fapl_id || H5FD_MPIPOSIX!=H5P_get_driver(plist)) {
+ _fa.comm = MPI_COMM_SELF; /*default*/
+ fa = &_fa;
+ } /* end if */
+ else {
+ fa = H5P_get_driver_info(plist);
+ assert(fa);
+ } /* end else */
+
+ /* Get the MPI rank of this process and the total number of processes */
+ if (MPI_SUCCESS != (mpi_code=MPI_Comm_rank (fa->comm, &mpi_rank)))
+ HMPI_GOTO_ERROR(NULL, "MPI_Comm_rank failed", mpi_code);
+ if (MPI_SUCCESS != (mpi_code=MPI_Comm_size (fa->comm, &mpi_size)))
+ HMPI_GOTO_ERROR(NULL, "MPI_Comm_size failed", mpi_code);
+
+ /* Build the open flags */
+ o_flags = (H5F_ACC_RDWR & flags) ? O_RDWR : O_RDONLY;
+
+ /* Only set the creation flag(s) for process 0 */
+ if(mpi_rank==0) {
+ if (H5F_ACC_TRUNC & flags)
+ o_flags |= O_TRUNC;
+ if (H5F_ACC_CREAT & flags)
+ o_flags |= O_CREAT;
+ if (H5F_ACC_EXCL & flags)
+ o_flags |= O_EXCL;
+ } /* end if */
+
+ /* Process 0 opens (or creates) the file while the rest of the
+ * processes wait. Then process 0 signals the other processes and they
+ * open (never create) the file and all processes proceed.
+ */
+ /* Process 0 opens (or creates) file and broadcasts result to other processes */
+ if(mpi_rank==0) {
+ /* Open the file */
+ fd=HDopen(name, o_flags, 0666);
+ } /* end if */
+
+ /* Broadcast the results of the open() from process 0 */
+ /* This is necessary because of the "tentative open" code in H5F_open()
+ * where the file is attempted to be opened with different flags from the
+ * user's, in order to check for the file's existence, etc. Here, process 0
+ * gets different flags from the other processes (since it is in charge of
+ * creating the file, if necessary) and can fail in situations where the
+ * other process's file opens would succeed, so allow the other processes
+ * to check for that situation and bail out now also. - QAK
+ */
+ if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&fd, sizeof(int), MPI_BYTE, 0, fa->comm)))
+ HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code);
+
+ /* If the file open on process 0 failed, bail out on all processes now */
+ if(fd<0)
+ HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open file");
+
+ /* Other processes (non 0) wait for broadcast result from process 0 and then open file */
+ if(mpi_rank!=0) {
+ /* Open the file */
+ if ((fd=HDopen(name, o_flags, 0666))<0)
+ HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open file");
+ } /* end if */
+
+ /* Process 0 fstat()s the file and broadcasts the results to the other processes */
+ if(mpi_rank==0) {
+ /* Get the stat information */
+ if (HDfstat(fd, &sb)<0)
+ HGOTO_ERROR(H5E_FILE, H5E_BADFILE, NULL, "unable to fstat file");
+ } /* end if */
+
+ /* Broadcast the results of the fstat() from process 0 */
+ if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&sb, sizeof(h5_stat_t), MPI_BYTE, 0, fa->comm)))
+ HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code);
+
+ /* Build the file struct and initialize it */
+ if (NULL==(file=H5MM_calloc(sizeof(H5FD_mpiposix_t))))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed");
+
+ /* Set the general file information */
+ file->fd = fd;
+ file->eof = sb.st_size;
+
+ /* Set the MPI information */
+ file->comm = fa->comm;
+ file->mpi_rank = mpi_rank;
+ file->mpi_size = mpi_size;
+ file->mpi_round = 0; /* Start metadata writes with process 0 */
+
+ /* Reset the last file I/O operation */
+ file->pos = HADDR_UNDEF;
+ file->op = OP_UNKNOWN;
+
+ /* Set the information for the file's device and inode */
+#ifdef WIN32
+ filehandle = _get_osfhandle(fd);
+ results = GetFileInformationByHandle((HANDLE)filehandle, &fileinfo);
+ file->fileindexhi = fileinfo.nFileIndexHigh;
+ file->fileindexlo = fileinfo.nFileIndexLow;
+#else
+ file->device = sb.st_dev;
+ file->inode = sb.st_ino;
+#endif
+
+ /* Indicate success */
+ ret_value=(H5FD_t *)file;
+
+done:
+ /* Error cleanup */
+ if(ret_value==NULL) {
+ /* Close the file if it was left open */
+ if(fd!=(-1))
+ HDclose(fd);
+ } /* end if */
+
+ FUNC_LEAVE(ret_value);
+} /* end H5FD_mpiposix_open() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_mpiposix_close
+ *
+ * Purpose: Closes a file.
+ *
+ * Return: Success: Non-negative
+ * Failure: Negative
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_mpiposix_close(H5FD_t *_file)
+{
+ H5FD_mpiposix_t *file = (H5FD_mpiposix_t*)_file;
+
+ FUNC_ENTER_NOAPI(H5FD_mpiposix_close, FAIL);
+
+ assert(file);
+ assert(H5FD_MPIPOSIX==file->pub.driver_id);
+
+ /* Close the unix file */
+ if (HDclose(file->fd)<0)
+ HRETURN_ERROR(H5E_IO, H5E_CANTCLOSEFILE, FAIL, "unable to close file");
+
+ H5MM_xfree(file);
+
+ FUNC_LEAVE(SUCCEED);
+} /* end H5FD_mpiposix_close() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_mpiposix_cmp
+ *
+ * Purpose: Compares two files belonging to this driver using an
+ * arbitrary (but consistent) ordering.
+ *
+ * Return: Success: A value like strcmp()
+ * Failure: never fails (arguments were checked by the
+ * caller).
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static int
+H5FD_mpiposix_cmp(const H5FD_t *_f1, const H5FD_t *_f2)
+{
+ const H5FD_mpiposix_t *f1 = (const H5FD_mpiposix_t*)_f1;
+ const H5FD_mpiposix_t *f2 = (const H5FD_mpiposix_t*)_f2;
+ int ret_value=0;
+
+ FUNC_ENTER_NOAPI(H5FD_mpiposix_cmp, H5FD_VFD_DEFAULT);
+
+#ifdef WIN32
+ if (f1->fileindexhi < f2->fileindexhi) ret_value= -1;
+ if (f1->fileindexhi > f2->fileindexhi) ret_value= 1;
+
+ if (f1->fileindexlo < f2->fileindexlo) ret_value= -1;
+ if (f1->fileindexlo > f2->fileindexlo) ret_value= 1;
+
+#else
+ if (f1->device < f2->device) ret_value= -1;
+ if (f1->device > f2->device) ret_value= 1;
+
+ if (f1->inode < f2->inode) ret_value= -1;
+ if (f1->inode > f2->inode) ret_value= 1;
+#endif
+
+ FUNC_LEAVE(ret_value);
+} /* end H5FD_mpiposix_cmp() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_mpiposix_query
+ *
+ * Purpose: Set the flags that this VFL driver is capable of supporting.
+ * (listed in H5FDpublic.h)
+ *
+ * Return: Success: non-negative
+ * Failure: negative
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_mpiposix_query(const H5FD_t UNUSED *_file, unsigned long *flags /* out */)
+{
+ herr_t ret_value=SUCCEED;
+
+ FUNC_ENTER_NOAPI(H5FD_mpiposix_query, FAIL);
+
+ /* Set the VFL feature flags that this driver supports */
+ if(flags) {
+ *flags=0;
+ *flags|=H5FD_FEAT_AGGREGATE_METADATA; /* OK to aggregate metadata allocations */
+
+ /* Distinguish between updating the metadata accumulator on writes and
+ * reads. This is particularly (perhaps only, even) important for MPI-I/O
+ * where we guarantee that writes are collective, but reads may not be.
+ * If we were to allow the metadata accumulator to be written during a
+ * read operation, the application would hang.
+ */
+ *flags|=H5FD_FEAT_ACCUMULATE_METADATA_WRITE; /* OK to accumulate metadata for faster writes */
+
+ *flags|=H5FD_FEAT_AGGREGATE_SMALLDATA; /* OK to aggregate "small" raw data allocations */
+ } /* end if */
+
+ FUNC_LEAVE(ret_value);
+} /* end H5FD_mpiposix_query() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_mpiposix_get_eoa
+ *
+ * Purpose: Gets the end-of-address marker for the file. The EOA marker
+ * is the first address past the last byte allocated in the
+ * format address space.
+ *
+ * Return: Success: The end-of-address marker.
+ * Failure: HADDR_UNDEF
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static haddr_t
+H5FD_mpiposix_get_eoa(H5FD_t *_file)
+{
+ H5FD_mpiposix_t *file = (H5FD_mpiposix_t*)_file;
+
+ FUNC_ENTER_NOAPI(H5FD_mpiposix_get_eoa, HADDR_UNDEF);
+
+ assert(file);
+ assert(H5FD_MPIPOSIX==file->pub.driver_id);
+
+ FUNC_LEAVE(file->eoa);
+} /* end H5FD_mpiposix_get_eoa() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_mpiposix_set_eoa
+ *
+ * Purpose: Set the end-of-address marker for the file. This function is
+ * called shortly after an existing HDF5 file is opened in order
+ * to tell the driver where the end of the HDF5 data is located.
+ *
+ * Return: Success: non-negative
+ * Failure: negative
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_mpiposix_set_eoa(H5FD_t *_file, haddr_t addr)
+{
+ H5FD_mpiposix_t *file = (H5FD_mpiposix_t*)_file;
+
+ FUNC_ENTER_NOAPI(H5FD_mpiposix_set_eoa, FAIL);
+
+ assert(file);
+ assert(H5FD_MPIPOSIX==file->pub.driver_id);
+
+ file->eoa = addr;
+
+ FUNC_LEAVE(SUCCEED);
+} /* end H5FD_mpi_posix_set_eoa() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_mpiposix_get_eof
+ *
+ * Purpose: Gets the end-of-file marker for the file. The EOF marker
+ * is the real size of the file.
+ *
+ * The MPIPOSIX driver doesn't bother keeping this field updated
+ * since that's a relatively expensive operation. Fortunately
+ * the library only needs the EOF just after the file is opened
+ * in order to determine whether the file is empty, truncated,
+ * or okay.
+ *
+ * Return: Success: The end-of-address marker.
+ * Failure: HADDR_UNDEF
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static haddr_t
+H5FD_mpiposix_get_eof(H5FD_t *_file)
+{
+ H5FD_mpiposix_t *file = (H5FD_mpiposix_t*)_file;
+
+ FUNC_ENTER_NOAPI(H5FD_mpiposix_get_eof, HADDR_UNDEF);
+
+ assert(file);
+ assert(H5FD_MPIPOSIX==file->pub.driver_id);
+
+ FUNC_LEAVE(MAX(file->eof,file->eoa));
+} /* end H5FD_mpiposix_get_eof() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_mpiposix_read
+ *
+ * Purpose: Reads SIZE bytes of data from FILE beginning at address ADDR
+ * into buffer BUF according to data transfer properties in
+ * DXPL_ID using potentially complex file and buffer types to
+ * effect the transfer.
+ *
+ * Reading past the end of the file returns zeros instead of
+ * failing.
+ *
+ * Return: Success: Non-negative. Result is stored in caller-supplied
+ * buffer BUF.
+ * Failure: Negative, Contents of buffer BUF are undefined.
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_mpiposix_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t UNUSED dxpl_id, haddr_t addr, size_t size,
+ void *buf/*out*/)
+{
+ H5FD_mpiposix_t *file = (H5FD_mpiposix_t*)_file;
+ ssize_t nbytes; /* Number of bytes read each I/O call */
+ herr_t ret_value=SUCCEED;
+
+ FUNC_ENTER_NOAPI(H5FD_mpiposix_read, FAIL);
+
+ assert(file);
+ assert(H5FD_MPIPOSIX==file->pub.driver_id);
+ assert(buf);
+
+ /* Check for overflow conditions */
+ if (HADDR_UNDEF==addr)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "addr undefined");
+ if (REGION_OVERFLOW(addr, size))
+ HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "addr overflow");
+ if (addr+size>file->eoa)
+ HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "addr overflow");
+
+ /* Seek to the correct location */
+ if ((addr!=file->pos || OP_READ!=file->op) &&
+ file_seek(file->fd, (file_offset_t)addr, SEEK_SET)<0)
+ HGOTO_ERROR(H5E_IO, H5E_SEEKERROR, FAIL, "unable to seek to proper position");
+
+ /*
+ * Read data, being careful of interrupted system calls, partial results,
+ * and the end of the file.
+ */
+ while (size>0) {
+ do {
+ nbytes = HDread(file->fd, buf, size);
+ } while (-1==nbytes && EINTR==errno);
+ if (-1==nbytes)
+ HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed");
+ if (0==nbytes) {
+ /* end of file but not end of format address space */
+ HDmemset(buf, 0, size);
+ size = 0;
+ } /* end if */
+ assert(nbytes>=0);
+ assert((size_t)nbytes<=size);
+ size -= nbytes;
+ addr += (haddr_t)nbytes;
+ buf = (char*)buf + nbytes;
+ }
+
+ /* Update current position */
+ file->pos = addr;
+ file->op = OP_READ;
+
+done:
+ /* Check for error */
+ if(ret_value<0) {
+ /* Reset last file I/O information */
+ file->pos = HADDR_UNDEF;
+ file->op = OP_UNKNOWN;
+ } /* end if */
+
+ FUNC_LEAVE(ret_value);
+} /* end H5FD_mpiposix_read() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_mpiposix_write
+ *
+ * Purpose: Writes SIZE bytes of data to FILE beginning at address ADDR
+ * from buffer BUF according to data transfer properties in
+ * DXPL_ID using potentially complex file and buffer types to
+ * effect the transfer.
+ *
+ * Return: Success: non-negative
+ * Failure: negative
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t UNUSED dxpl_id, haddr_t addr,
+ size_t size, const void *buf)
+{
+ H5FD_mpiposix_t *file = (H5FD_mpiposix_t*)_file;
+ int mpi_code; /* MPI return code */
+ ssize_t nbytes; /* Number of bytes written each I/O call */
+ herr_t ret_value=SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(H5FD_mpiposix_write, FAIL);
+
+ assert(file);
+ assert(H5FD_MPIPOSIX==file->pub.driver_id);
+ assert(buf);
+
+ /* Check for overflow conditions */
+ if (HADDR_UNDEF==addr)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "addr undefined");
+ if (REGION_OVERFLOW(addr, size))
+ HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "addr overflow");
+ if (addr+size>file->eoa)
+ HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "addr overflow");
+
+ /* Only p<round> will do the actual write if all procs in comm write same data */
+ if ((type!=H5FD_MEM_DRAW) && H5_mpiposix_1_metawrite_g) {
+ if (file->mpi_rank != file->mpi_round)
+ HGOTO_DONE(SUCCEED) /* skip the actual write */
+ } /* end if */
+
+ /* Seek to the correct location */
+ if ((addr!=file->pos || OP_WRITE!=file->op) &&
+ file_seek(file->fd, (file_offset_t)addr, SEEK_SET)<0)
+ HGOTO_ERROR(H5E_IO, H5E_SEEKERROR, FAIL,
+ "unable to seek to proper position");
+
+ /*
+ * Write the data, being careful of interrupted system calls and partial
+ * results
+ */
+ while (size>0) {
+ do {
+ nbytes = HDwrite(file->fd, buf, size);
+ } while (-1==nbytes && EINTR==errno);
+ if (-1==nbytes)
+ HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "file write failed");
+ assert(nbytes>0);
+ assert((size_t)nbytes<=size);
+ size -= nbytes;
+ addr += (haddr_t)nbytes;
+ buf = (const char*)buf + nbytes;
+ } /* end while */
+
+ /* Update current last file I/O information */
+ file->pos = addr;
+ file->op = OP_WRITE;
+
+done:
+ /* Check for error */
+ if(ret_value<0) {
+ /* Reset last file I/O information */
+ file->pos = HADDR_UNDEF;
+ file->op = OP_UNKNOWN;
+ } /* end if */
+ /* Guard against getting into metadata broadcast in failure cases */
+ else {
+ /* if only p<round> writes, need to broadcast the ret_value to other processes */
+ if ((type!=H5FD_MEM_DRAW) && H5_mpiposix_1_metawrite_g) {
+ if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, file->mpi_round, file->comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code);
+
+ /* Round-robin rotate to the next process */
+ file->mpi_round = (++file->mpi_round)%file->mpi_size;
+#ifdef QAK
+ {
+ int max,min;
+
+ MPI_Allreduce(&file->mpi_round, &max, 1, MPI_INT, MPI_MAX, file->comm);
+ MPI_Allreduce(&file->mpi_round, &min, 1, MPI_INT, MPI_MIN, file->comm);
+ if(max!=file->mpi_round)
+ printf("%s: rank=%d, round=%d, max=%d\n",FUNC,file->mpi_rank,file->mpi_round,max);
+ if(min!=file->mpi_round)
+ printf("%s: rank=%d, round=%d, min=%d\n",FUNC,file->mpi_rank,file->mpi_round,min);
+ }
+#endif /* QAK */
+ } /* end if */
+ } /* end else */
+
+ FUNC_LEAVE(ret_value);
+} /* end H5FD_mpiposix_write() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_mpiposix_flush
+ *
+ * Purpose: Makes sure that all data is on disk. This is collective.
+ *
+ * Return: Success: Non-negative
+ * Failure: Negative
+ *
+ * Programmer: Quincey Koziol
+ * Thursday, July 11, 2002
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_mpiposix_flush(H5FD_t *_file, unsigned UNUSED closing)
+{
+ H5FD_mpiposix_t *file = (H5FD_mpiposix_t*)_file;
+#ifdef WIN32
+ HFILE filehandle; /* Windows file handle */
+ LARGE_INTEGER li; /* 64-bit integer for SetFilePointer() call */
+#endif /* WIN32 */
+ int mpi_code; /* MPI return code */
+ herr_t ret_value=SUCCEED;
+
+ FUNC_ENTER_NOAPI(H5FD_mpiposix_flush, FAIL);
+
+ assert(file);
+ assert(H5FD_MPIPOSIX==file->pub.driver_id);
+
+ /* Extend the file to make sure it's large enough */
+ if(file->eoa>file->last_eoa) {
+ /* Use the round-robin process to truncate (extend) the file */
+ if(file->mpi_rank == file->mpi_round) {
+#ifdef WIN32
+ /* Map the posix file handle to a Windows file handle */
+ filehandle = _get_osfhandle(fd);
+
+ /* Translate 64-bit integers into form Windows wants */
+ /* [This algorithm is from the Windows documentation for SetFilePointer()] */
+ li.QuadPart = file->eoa;
+ SetFilePointer((HANDLE)filehandle,li.LowPart,&li.HighPart,FILE_BEGIN);
+ if(SetEndOfFile((HANDLE)filehandle)==0)
+ HGOTO_ERROR(H5E_IO, H5E_SEEKERROR, FAIL, "unable to extend file properly");
+#else /* WIN32 */
+ if(-1==file_truncate(file->fd, (file_offset_t)file->eoa))
+ HGOTO_ERROR(H5E_IO, H5E_SEEKERROR, FAIL, "unable to extend file properly");
+#endif /* WIN32 */
+ } /* end if */
+
+ /* Don't let any proc return until all have extended the file.
+ * (Prevents race condition where some processes go ahead and write
+ * more data to the file before all the processes have finished making
+ * it the shorter length, potentially truncating the file and dropping
+ * the new data written)
+ */
+ if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code);
+
+ /* Update the 'last' eoa and eof values */
+ file->last_eoa=file->eoa;
+ file->eof = file->eoa;
+
+ /* Reset last file I/O information */
+ file->pos = HADDR_UNDEF;
+ file->op = OP_UNKNOWN;
+ } /* end if */
+
+done:
+ FUNC_LEAVE(ret_value);
+} /* end H5FD_mpiposix_flush() */
+
+#endif /*H5_HAVE_PARALLEL*/
+
diff --git a/src/H5FDmpiposix.h b/src/H5FDmpiposix.h
new file mode 100644
index 0000000..81d05d4
--- /dev/null
+++ b/src/H5FDmpiposix.h
@@ -0,0 +1,62 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Copyright by the Board of Trustees of the University of Illinois. *
+ * All rights reserved. *
+ * *
+ * This file is part of HDF5. The full HDF5 copyright notice, including *
+ * terms governing use, modification, and redistribution, is contained in *
+ * the files COPYING and Copyright.html. COPYING can be found at the root *
+ * of the source code distribution tree; Copyright.html can be found at the *
+ * root level of an installed copy of the electronic HDF5 document set and *
+ * is linked from the top-level documents page. It can also be found at *
+ * http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html. If you do not have *
+ * access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/*
+ * Programmer: Quincey Koziol <koziol@ncsa.uiuc.edu>
+ * Thursday, July 11, 2002
+ *
+ * Purpose: The public header file for the mpiposix driver.
+ */
+
+#ifndef __H5FDmpiposix_H
+#define __H5FDmpiposix_H
+
+#include "H5FDpublic.h"
+#include "H5Ipublic.h"
+
+#ifdef H5_HAVE_PARALLEL
+# define H5FD_MPIPOSIX (H5FD_mpiposix_init())
+#else
+# define H5FD_MPIPOSIX (-1)
+#endif
+
+#ifdef H5_HAVE_PARALLEL
+
+/* Macros */
+
+#define IS_H5FD_MPIPOSIX(f) /* (H5F_t *f) */ \
+ (H5FD_MPIPOSIX==H5F_get_driver_id(f))
+
+/* Function prototypes */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+__DLL__ hid_t H5FD_mpiposix_init(void);
+__DLL__ herr_t H5Pset_fapl_mpiposix(hid_t fapl_id, MPI_Comm comm);
+__DLL__ herr_t H5Pget_fapl_mpiposix(hid_t fapl_id, MPI_Comm *comm/*out*/);
+__DLL__ MPI_Comm H5FD_mpiposix_communicator(H5FD_t *_file);
+__DLL__ herr_t H5FD_mpiposix_closing(H5FD_t *file);
+__DLL__ int H5FD_mpiposix_mpi_rank(H5FD_t *_file);
+__DLL__ int H5FD_mpiposix_mpi_size(H5FD_t *_file);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /*H5_HAVE_PARALLEL*/
+
+#endif /* __H5FDmpiposix_H */
+
+
diff --git a/src/H5Farray.c b/src/H5Farray.c
index cefed52..d2f84be 100644
--- a/src/H5Farray.c
+++ b/src/H5Farray.c
@@ -171,27 +171,29 @@ H5F_arr_read(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
#ifdef H5_HAVE_PARALLEL
{
- /* Get the transfer mode */
H5FD_mpio_dxpl_t *dx;
hid_t driver_id; /* VFL driver ID */
- /* Get the plist structure */
- if(NULL == (plist = H5I_object(dxpl_id)))
- HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID");
-
- /* Get the driver ID */
- if(H5P_get(plist, H5D_XFER_VFL_ID_NAME, &driver_id)<0)
- HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver ID");
-
- /* Check if we are using the MPIO driver */
- if(H5FD_MPIO==driver_id) {
- /* Get the driver information */
- if(H5P_get(plist, H5D_XFER_VFL_INFO_NAME, &dx)<0)
- HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver info");
-
- /* Check if we are not using independent I/O */
- if(H5FD_MPIO_INDEPENDENT!=dx->xfer_mode)
- xfer_mode = dx->xfer_mode;
+ /* Get the transfer mode for MPIO transfers */
+ if(IS_H5FD_MPIO(f)) {
+ /* Get the plist structure */
+ if(NULL == (plist = H5I_object(dxpl_id)))
+ HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID");
+
+ /* Get the driver ID */
+ if(H5P_get(plist, H5D_XFER_VFL_ID_NAME, &driver_id)<0)
+ HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver ID");
+
+ /* Check if we are using the MPIO driver (for the DXPL) */
+ if(H5FD_MPIO==driver_id) {
+ /* Get the driver information */
+ if(H5P_get(plist, H5D_XFER_VFL_INFO_NAME, &dx)<0)
+ HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver info");
+
+ /* Check if we are not using independent I/O */
+ if(H5FD_MPIO_INDEPENDENT!=dx->xfer_mode)
+ xfer_mode = dx->xfer_mode;
+ } /* end if */
} /* end if */
}
@@ -412,27 +414,29 @@ H5F_arr_write(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
#ifdef H5_HAVE_PARALLEL
{
- /* Get the transfer mode */
H5FD_mpio_dxpl_t *dx;
hid_t driver_id; /* VFL driver ID */
- /* Get the plist structure */
- if(NULL == (plist = H5I_object(dxpl_id)))
- HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID");
-
- /* Get the driver ID */
- if(H5P_get(plist, H5D_XFER_VFL_ID_NAME, &driver_id)<0)
- HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver ID");
-
- /* Check if we are using the MPIO driver */
- if(H5FD_MPIO==driver_id) {
- /* Get the driver information */
- if(H5P_get(plist, H5D_XFER_VFL_INFO_NAME, &dx)<0)
- HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver info");
-
- /* Check if we are not using independent I/O */
- if(H5FD_MPIO_INDEPENDENT!=dx->xfer_mode)
- xfer_mode = dx->xfer_mode;
+ /* Get the transfer mode for MPIO transfers */
+ if(IS_H5FD_MPIO(f)) {
+ /* Get the plist structure */
+ if(NULL == (plist = H5I_object(dxpl_id)))
+ HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID");
+
+ /* Get the driver ID */
+ if(H5P_get(plist, H5D_XFER_VFL_ID_NAME, &driver_id)<0)
+ HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver ID");
+
+ /* Check if we are using the MPIO driver (for the DXPL) */
+ if(H5FD_MPIO==driver_id) {
+ /* Get the driver information */
+ if(H5P_get(plist, H5D_XFER_VFL_INFO_NAME, &dx)<0)
+ HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver info");
+
+ /* Check if we are not using independent I/O */
+ if(H5FD_MPIO_INDEPENDENT!=dx->xfer_mode)
+ xfer_mode = dx->xfer_mode;
+ } /* end if */
} /* end if */
}
diff --git a/src/H5Fistore.c b/src/H5Fistore.c
index a44ce5a..a9f7f56 100644
--- a/src/H5Fistore.c
+++ b/src/H5Fistore.c
@@ -45,8 +45,9 @@
#include "H5Sprivate.h" /* Dataspaces */
#include "H5Vprivate.h"
-/* MPIO driver needed for special checks */
+/* MPIO & MPIPOSIX drivers needed for special checks */
#include "H5FDmpio.h"
+#include "H5FDmpiposix.h"
/*
* Feature: If this constant is defined then every cache preemption and load
@@ -1781,12 +1782,12 @@ H5F_istore_read(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
#ifdef H5_HAVE_PARALLEL
/*
- * If MPIO is used and file can be written to, we must bypass the
+ * If MPIO or MPIPOSIX is used and file can be written to, we must bypass the
* chunk-cache scheme because other MPI processes could be writing to
* other elements in the same chunk.
* Do a direct write-through of only the elements requested.
*/
- || (IS_H5FD_MPIO(f) && (H5F_ACC_RDWR & f->shared->flags))
+ || ((IS_H5FD_MPIO(f) ||IS_H5FD_MPIPOSIX(f)) && (H5F_ACC_RDWR & f->shared->flags))
#endif /* H5_HAVE_PARALLEL */
) {
H5O_layout_t l; /* temporary layout */
@@ -1965,11 +1966,11 @@ H5F_istore_write(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
#ifdef H5_HAVE_PARALLEL
/*
- * If MPIO is used, must bypass the chunk-cache scheme because other
+ * If MPIO or MPIPOSIX is used, must bypass the chunk-cache scheme because other
* MPI processes could be writing to other elements in the same chunk.
* Do a direct write-through of only the elements requested.
*/
- || (IS_H5FD_MPIO(f) && (H5F_ACC_RDWR & f->shared->flags))
+ || ((IS_H5FD_MPIO(f) ||IS_H5FD_MPIPOSIX(f)) && (H5F_ACC_RDWR & f->shared->flags))
#endif /* H5_HAVE_PARALLEL */
) {
H5O_layout_t l; /* temporary layout */
@@ -2416,10 +2417,22 @@ H5F_istore_allocate(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
} /* end if */
/* Retrieve up MPI parameters */
- if ((mpi_rank=H5FD_mpio_mpi_rank(f->shared->lf))<0)
- HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank");
- if ((mpi_size=H5FD_mpio_mpi_size(f->shared->lf))<0)
- HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI size");
+ if(IS_H5FD_MPIO(f)) {
+ if ((mpi_rank=H5FD_mpio_mpi_rank(f->shared->lf))<0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank");
+ if ((mpi_size=H5FD_mpio_mpi_size(f->shared->lf))<0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI size");
+ } /* end if */
+ else {
+ /* Sanity Check */
+ assert(IS_H5FD_MPIPOSIX(f));
+
+ /* Get the MPI rank & size */
+ if ((mpi_rank=H5FD_mpiposix_mpi_rank(f->shared->lf))<0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank");
+ if ((mpi_size=H5FD_mpiposix_mpi_size(f->shared->lf))<0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI size");
+ } /* end else */
/* Loop over all chunks */
carry=0;
@@ -2469,8 +2482,17 @@ H5F_istore_allocate(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
* still writing out chunks and other processes race ahead to read
* them in, getting bogus data.
*/
- if (MPI_Barrier(H5FD_mpio_communicator(f->shared->lf)))
- HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_Barrier failed");
+ if(IS_H5FD_MPIO(f)) {
+ if (MPI_Barrier(H5FD_mpio_communicator(f->shared->lf)))
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_Barrier failed");
+ } /* end if */
+ else {
+ /* Sanity Check */
+ assert(IS_H5FD_MPIPOSIX(f));
+
+ if (MPI_Barrier(H5FD_mpiposix_communicator(f->shared->lf)))
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_Barrier failed");
+ } /* end else */
} /* end if */
done:
diff --git a/src/H5Fpkg.h b/src/H5Fpkg.h
index 10ed39b..189cbc7 100644
--- a/src/H5Fpkg.h
+++ b/src/H5Fpkg.h
@@ -161,6 +161,7 @@ struct H5F_t {
#ifdef H5_HAVE_PARALLEL
__DLLVAR__ hbool_t H5_mpi_1_metawrite_g;
+__DLLVAR__ hbool_t H5_mpiposix_1_metawrite_g;
#endif /* H5_HAVE_PARALLEL */
/* Private functions, not part of the publicly documented API */
diff --git a/src/H5Fseq.c b/src/H5Fseq.c
index 0a4da84..8f988ff 100644
--- a/src/H5Fseq.c
+++ b/src/H5Fseq.c
@@ -26,8 +26,9 @@
#include "H5Pprivate.h"
#include "H5Vprivate.h"
-/* MPIO driver functions are needed for some special checks */
+/* MPIO & MPIPOSIX driver functions are needed for some special checks */
#include "H5FDmpio.h"
+#include "H5FDmpiposix.h"
/* Interface initialization */
#define PABLO_MASK H5Fseq_mask
@@ -182,27 +183,29 @@ H5F_seq_readv(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
#ifdef H5_HAVE_PARALLEL
{
- /* Get the transfer mode */
H5FD_mpio_dxpl_t *dx;
hid_t driver_id; /* VFL driver ID */
- /* Get the plist structure */
- if(NULL == (plist = H5I_object(dxpl_id)))
- HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID");
-
- /* Get the driver ID */
- if(H5P_get(plist, H5D_XFER_VFL_ID_NAME, &driver_id)<0)
- HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver ID");
-
- /* Check if we are using the MPIO driver */
- if(H5FD_MPIO==driver_id) {
- /* Get the driver information */
- if(H5P_get(plist, H5D_XFER_VFL_INFO_NAME, &dx)<0)
- HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver info");
-
- /* Check if we are not using independent I/O */
- if(H5FD_MPIO_INDEPENDENT!=dx->xfer_mode)
- xfer_mode = dx->xfer_mode;
+ /* Get the transfer mode for MPIO transfers */
+ if(IS_H5FD_MPIO(f)) {
+ /* Get the plist structure */
+ if(NULL == (plist = H5I_object(dxpl_id)))
+ HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID");
+
+ /* Get the driver ID */
+ if(H5P_get(plist, H5D_XFER_VFL_ID_NAME, &driver_id)<0)
+ HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver ID");
+
+ /* Check if we are using the MPIO driver (for the DXPL) */
+ if(H5FD_MPIO==driver_id) {
+ /* Get the driver information */
+ if(H5P_get(plist, H5D_XFER_VFL_INFO_NAME, &dx)<0)
+ HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver info");
+
+ /* Check if we are not using independent I/O */
+ if(H5FD_MPIO_INDEPENDENT!=dx->xfer_mode)
+ xfer_mode = dx->xfer_mode;
+ } /* end if */
} /* end if */
}
@@ -564,27 +567,29 @@ H5F_seq_writev(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
#ifdef H5_HAVE_PARALLEL
{
- /* Get the transfer mode */
H5FD_mpio_dxpl_t *dx;
hid_t driver_id; /* VFL driver ID */
- /* Get the plist structure */
- if(NULL == (plist = H5I_object(dxpl_id)))
- HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID");
-
- /* Get the driver ID */
- if(H5P_get(plist, H5D_XFER_VFL_ID_NAME, &driver_id)<0)
- HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver ID");
-
- /* Check if we are using the MPIO driver */
- if(H5FD_MPIO==driver_id) {
- /* Get the driver information */
- if(H5P_get(plist, H5D_XFER_VFL_INFO_NAME, &dx)<0)
- HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver info");
-
- /* Check if we are not using independent I/O */
- if(H5FD_MPIO_INDEPENDENT!=dx->xfer_mode)
- xfer_mode = dx->xfer_mode;
+ /* Get the transfer mode for MPIO transfers */
+ if(IS_H5FD_MPIO(f)) {
+ /* Get the plist structure */
+ if(NULL == (plist = H5I_object(dxpl_id)))
+ HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID");
+
+ /* Get the driver ID */
+ if(H5P_get(plist, H5D_XFER_VFL_ID_NAME, &driver_id)<0)
+ HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver ID");
+
+ /* Check if we are using the MPIO driver (for the DXPL) */
+ if(H5FD_MPIO==driver_id) {
+ /* Get the driver information */
+ if(H5P_get(plist, H5D_XFER_VFL_INFO_NAME, &dx)<0)
+ HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver info");
+
+ /* Check if we are not using independent I/O */
+ if(H5FD_MPIO_INDEPENDENT!=dx->xfer_mode)
+ xfer_mode = dx->xfer_mode;
+ } /* end if */
} /* end if */
}
diff --git a/src/Makefile.in b/src/Makefile.in
index 8875f15..78aef12 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -20,8 +20,8 @@ CLEAN=libhdf5.settings
## Source and object files for the library (lexicographically)...
LIB_SRC=H5.c H5A.c H5AC.c H5B.c H5D.c H5E.c H5F.c H5Farray.c H5Fcontig.c \
- H5Fistore.c H5Fseq.c H5FD.c H5FDsec2.c H5FDfamily.c H5FDmpio.c H5FDcore.c \
- H5FDmulti.c H5FDgass.c H5FDlog.c H5FDsrb.c H5FDstdio.c \
+ H5Fistore.c H5Fseq.c H5FD.c H5FDcore.c H5FDfamily.c H5FDgass.c H5FDlog.c \
+ H5FDmpio.c H5FDmpiposix.c H5FDmulti.c H5FDsec2.c H5FDsrb.c H5FDstdio.c \
H5FDstream.c H5FL.c H5G.c H5Gent.c H5Gnode.c H5Gstab.c H5HG.c H5HL.c H5I.c \
H5MF.c H5MM.c H5O.c H5Oattr.c H5Ocomp.c H5Ocont.c H5Odtype.c H5Oefl.c \
H5Ofill.c H5Olayout.c H5Omtime.c H5Oname.c H5Onull.c H5Osdspace.c \
@@ -37,11 +37,11 @@ MOSTLYCLEAN=H5detect.o H5detect.lo H5detect H5Tinit.o H5Tinit.lo H5Tinit.c
## Public header files (to be installed)...
PUB_HDR=H5public.h H5Apublic.h H5ACpublic.h H5Bpublic.h H5Dpublic.h H5Epublic.h \
- H5Fpublic.h H5FDpublic.h H5FDfamily.h H5FDgass.h H5FDmpio.h \
- H5FDlog.h H5FDsec2.h H5FDsrb.h H5FDstream.h H5FDcore.h H5FDmulti.h \
- H5FDstdio.h H5Gpublic.h H5HGpublic.h H5HLpublic.h H5Ipublic.h H5MMpublic.h \
- H5Opublic.h H5Ppublic.h H5Rpublic.h H5Spublic.h H5Tpublic.h H5Zpublic.h \
- H5pubconf.h hdf5.h H5api_adpt.h
+ H5Fpublic.h H5FDpublic.h H5FDcore.h H5FDfamily.h H5FDgass.h H5FDlog.h \
+ H5FDmpio.h H5FDmpiposix.h H5FDmulti.h H5FDsec2.h H5FDsrb.h H5FDstdio.h \
+ H5FDstream.h H5Gpublic.h H5HGpublic.h H5HLpublic.h H5Ipublic.h \
+ H5MMpublic.h H5Opublic.h H5Ppublic.h H5Rpublic.h H5Spublic.h H5Tpublic.h \
+ H5Zpublic.h H5pubconf.h hdf5.h H5api_adpt.h
## Other header files (not to be installed)...
PRIVATE_HDR=H5private.h H5Aprivate.h H5Apkg.h H5ACprivate.h H5Bprivate.h \
diff --git a/src/hdf5.h b/src/hdf5.h
index a5bd15b..e7657a3 100644
--- a/src/hdf5.h
+++ b/src/hdf5.h
@@ -41,13 +41,14 @@
/* Predefined file drivers */
#include "H5FDcore.h" /* Files stored entirely in memory */
#include "H5FDfamily.h" /* File families */
+#include "H5FDgass.h" /* Remote files using GASS I/O */
+#include "H5FDlog.h" /* sec2 driver with I/O logging (for debugging) */
#include "H5FDmpio.h" /* Parallel files using MPI-2 I/O */
+#include "H5FDmpiposix.h" /* Parallel files using combination MPI-2 & posix I/O */
+#include "H5FDmulti.h" /* Usage-partitioned file family */
#include "H5FDsec2.h" /* POSIX unbuffered file I/O */
-#include "H5FDstdio.h" /* Standard C buffered I/O */
#include "H5FDsrb.h" /* Remote access using SRB */
-#include "H5FDgass.h" /* Remote files using GASS I/O */
-#include "H5FDstream.h" /* in-memory files streamed via sockets */
-#include "H5FDmulti.h" /* Usage-partitioned file family */
-#include "H5FDlog.h" /* sec2 driver with I/O logging (for debugging) */
+#include "H5FDstdio.h" /* Standard C buffered I/O */
+#include "H5FDstream.h" /* In-memory files streamed via sockets */
#endif