summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/H5FDfphdf5.c1401
-rw-r--r--src/H5FDfphdf5.h40
-rw-r--r--src/Makefile.in13
3 files changed, 874 insertions, 580 deletions
diff --git a/src/H5FDfphdf5.c b/src/H5FDfphdf5.c
index 689101b..2eddc55 100644
--- a/src/H5FDfphdf5.c
+++ b/src/H5FDfphdf5.c
@@ -14,10 +14,12 @@
#include "H5private.h" /* Library functions */
#include "H5ACprivate.h" /* Metadata cache */
+#include "H5Dprivate.h" /* Dataset functions */
#include "H5Eprivate.h" /* Error handling */
#include "H5Fprivate.h" /* Files */
#include "H5FDprivate.h" /* File driver */
#include "H5FDfphdf5.h" /* Flexible PHDF5 I/O file driver */
+#include "H5FDmpio.h" /* MPI I/O file driver */
#include "H5Iprivate.h" /* Object IDs */
#include "H5MMprivate.h" /* Memory allocation */
#include "H5Pprivate.h" /* Property lists */
@@ -37,15 +39,20 @@ static hid_t H5FD_FPHDF5_g = 0;
/*
* The description of a file belonging to this driver.
*
- * The EOF value is only used just after the file is opened in order for
+ * The FILE_ID field is an SAP defined value. When reading/writing to the
+ * SAP, this value should be sent.
+ *
+ * The EOF field is only used just after the file is opened in order for
* the library to determine whether the file is empty, truncated, or
- * okay. The MPIO driver doesn't bother to keep it updated since it's an
- * expensive operation.
+ * okay. The FPHDF5 driver doesn't bother to keep it updated since it's
+ * an expensive operation.
*/
typedef struct H5FP_fphdf5_t {
H5FD_t pub; /*Public stuff, must be first (ick!) */
+ unsigned file_id; /*ID used by the SAP */
MPI_File f; /*MPIO file handle */
MPI_Comm comm; /*Communicator */
+ MPI_Comm barrier_comm; /*Barrier communicator */
MPI_Info info; /*File information */
int mpi_rank; /*This process's rank */
int mpi_size; /*Total number of processes */
@@ -74,18 +81,21 @@ static herr_t H5FD_fphdf5_set_eoa(H5FD_t *_file, haddr_t addr);
static haddr_t H5FD_fphdf5_get_eof(H5FD_t *_file);
static herr_t H5FD_fphdf5_get_handle(H5FD_t *_file, hid_t fapl,
void **file_handle);
-static herr_t H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id,
- haddr_t addr, size_t size, void *buf);
+static herr_t H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id,
+ haddr_t addr, size_t size, void *buf);
static herr_t H5FD_fphdf5_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id,
- haddr_t addr, size_t size, const void *buf);
-static herr_t H5FD_fphdf5_flush(H5FD_t *_file, unsigned closing);
+ haddr_t addr, size_t size, const void *buf);
+static herr_t H5FD_fphdf5_flush(H5FD_t *_file, hid_t dxpl_id, unsigned closing);
/*
* FPHDF5-specific file access properties
*/
typedef struct H5FD_fphdf5_fapl_t {
- MPI_Comm comm; /*communicator */
- MPI_Info info; /*file information */
+ MPI_Comm comm; /*communicator */
+ MPI_Comm barrier_comm; /*barrier communicator */
+ MPI_Info info; /*file information */
+ unsigned sap_rank; /*SAP's rank */
+ unsigned capt_rank; /*captain rank */
} H5FD_fphdf5_fapl_t;
/*
@@ -121,18 +131,6 @@ static const H5FD_class_t H5FD_fphdf5_g = {
H5FD_FLMAP_SINGLE, /*fl_map */
};
-/*
- * Global var to allow elimination of redundant metadata writes to be
- * controlled by the value of an environment variable.
- *
- * Use the elimination by default unless this is the Intel Red machine
- */
-#ifndef __PUMAGON__
-hbool_t H5_fphdf5_1_metawrite_g = TRUE;
-#else
-hbool_t H5_fphdf5_1_metawrite_g = FALSE;
-#endif
-
/* Interface initialization */
#define PABLO_MASK H5FD_fphdf5_mask
#define INTERFACE_INIT H5FD_fphdf5_init
@@ -218,26 +216,34 @@ done:
*-------------------------------------------------------------------------
*/
herr_t
-H5Pset_fapl_fphdf5(hid_t fapl_id, MPI_Comm comm, MPI_Info info)
+H5Pset_fapl_fphdf5(hid_t fapl_id, MPI_Comm comm, MPI_Comm barrier_comm,
+ MPI_Info info, unsigned sap_rank)
{
H5FD_fphdf5_fapl_t fa;
H5P_genplist_t *plist;
+ int mrc, comm_size;
herr_t ret_value;
FUNC_ENTER_API(H5Pset_fapl_fphdf5, FAIL);
- H5TRACE3("e","iMcMi",fapl_id,comm,info);
+ H5TRACE5("e","iMcMcMiIu",fapl_id,comm,barrier_comm,info,sap_rank);
if (fapl_id == H5P_DEFAULT)
HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL,
"can't set values in default property list");
/* Check arguments */
- if ((plist = H5P_object_verify(fapl_id,H5P_FILE_ACCESS)) == NULL)
+ if ((plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS)) == NULL)
HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a file access list");
+ if ((mrc = MPI_Comm_size(comm, &comm_size)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_Comm_size failed", mrc);
+
/* Initialize driver specific properties */
fa.comm = comm;
+ fa.barrier_comm = barrier_comm;
fa.info = info;
+ fa.sap_rank = sap_rank;
+ fa.capt_rank = (sap_rank + 1) % comm_size;
ret_value = H5P_set_driver(plist, H5FD_FPHDF5, &fa);
@@ -265,14 +271,16 @@ done:
*-------------------------------------------------------------------------
*/
herr_t
-H5Pget_fapl_fphdf5(hid_t fapl_id, MPI_Comm *comm /*out*/, MPI_Info *info /*out*/)
+H5Pget_fapl_fphdf5(hid_t fapl_id, MPI_Comm *comm, MPI_Comm *barrier_comm,
+ MPI_Info *info, unsigned *sap_rank, unsigned *capt_rank)
{
H5FD_fphdf5_fapl_t *fa;
H5P_genplist_t *plist;
herr_t ret_value = SUCCEED;
FUNC_ENTER_API(H5Pget_fapl_fphdf5, FAIL);
- H5TRACE3("e","ixx",fapl_id,comm,info);
+ H5TRACE6("e","i*Mc*Mc*Mi*Iu*Iu",fapl_id,comm,barrier_comm,info,sap_rank,
+ capt_rank);
if ((plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS)) == NULL)
HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a file access list");
@@ -286,9 +294,18 @@ H5Pget_fapl_fphdf5(hid_t fapl_id, MPI_Comm *comm /*out*/, MPI_Info *info /*out*/
if (comm)
*comm = fa->comm;
+ if (barrier_comm)
+ *barrier_comm = fa->barrier_comm;
+
if (info)
*info = fa->info;
+ if (sap_rank)
+ *sap_rank = fa->sap_rank;
+
+ if (capt_rank)
+ *capt_rank = fa->capt_rank;
+
done:
FUNC_LEAVE_API(ret_value);
}
@@ -323,6 +340,37 @@ done:
FUNC_LEAVE_NOAPI(ret_value);
}
+/*-------------------------------------------------------------------------
+ * Function: H5FD_fphdf5_barrier_communicator
+ * Purpose: Returns the MPI communicator for the file that can be
+ * used in an MPI_Barrier() statement for the client
+ * processes.
+ * Return: Success: The barrier communicator
+ * Failure: NULL
+ * Programmer: Bill Wendling
+ * 10. February 2003
+ * Modifications:
+ *-------------------------------------------------------------------------
+ */
+MPI_Comm
+H5FD_fphdf5_barrier_communicator(H5FD_t *_file)
+{
+ H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file;
+ MPI_Comm ret_value;
+
+ FUNC_ENTER_NOAPI(H5FD_fphdf5_communicator, MPI_COMM_NULL);
+
+ /* check args */
+ assert(file);
+ assert(file->pub.driver_id == H5FD_FPHDF5);
+
+ /* Set return value */
+ ret_value = file->barrier_comm;
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value);
+}
+
/*-------------------------------------------------------------------------
* Function: H5FD_fphdf5_mpi_rank
@@ -471,21 +519,104 @@ done:
/*-------------------------------------------------------------------------
- * Function: H5FD_fphdf5_fapl_get
- *
- * Purpose: Returns a file access property list which could be used to
- * create another file the same as this one.
+ * Function: H5Pset_dxpl_fphdf5
+ * Purpose: Set the data transfer property list DXPL_ID to use
+ * transfer mode XFER_MODE. The property list can then be
+ * used to control the I/O transfer mode during data I/O
+ * operations. The valid transfer modes are:
*
- * Return: Success: Ptr to new file access property list with all
- * fields copied from the file pointer.
- *
- * Failure: NULL
- *
- * Programmer: Robb Matzke
- * Friday, August 13, 1999
+ * H5FD_MPIO_INDEPENDENT:
+ * Use independent I/O access (the default).
*
+ * H5FD_MPIO_COLLECTIVE:
+ * Use collective I/O access.
+ * Return: Success: SUCCEED
+ * Failure: FAIL
+ * Programmer: Bill Wendling
+ * 10. February 2003
+ * Modifications:
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5Pset_dxpl_fphdf5(hid_t dxpl_id, H5FD_mpio_xfer_t xfer_mode)
+{
+ H5P_genplist_t *plist;
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_API(H5Pset_dxpl_fphdf5, FAIL);
+ H5TRACE2("e","iDt",dxpl_id,xfer_mode);
+
+ if (dxpl_id == H5P_DEFAULT)
+ HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL,
+ "can't set values in default property list");
+
+ /* Check arguments */
+ if ((plist = H5P_object_verify(dxpl_id,H5P_DATASET_XFER)) == NULL)
+ HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl");
+
+ if (xfer_mode != H5FD_MPIO_INDEPENDENT && xfer_mode != H5FD_MPIO_COLLECTIVE)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "incorrect xfer_mode");
+
+ /* Set the transfer mode */
+ if (H5P_set(plist, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to set value");
+
+ /* Initialize driver-specific properties */
+ ret_value = H5P_set_driver(plist, H5FD_MPIO, NULL);
+
+done:
+ FUNC_LEAVE_API(ret_value);
+}
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5Pget_dxpl_fphdf5
+ * Purpose: Queries the transfer mode current set in the data
+ * transfer property list DXPL_ID. This is not collective.
+ * Return: Success: SUCCEED - with the transfer mode returned
+ * through the XFER_MODE argument if
+ * it is non-null.
+ * Failure: FAIL
+ * Programmer: Bill Wendling
+ * 10. February 2003
+ * Modifications:
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5Pget_dxpl_fphdf5(hid_t dxpl_id, H5FD_mpio_xfer_t *xfer_mode)
+{
+ H5P_genplist_t *plist;
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_API(H5Pget_dxpl_fphdf5, FAIL);
+ H5TRACE2("e","i*Dt",dxpl_id,xfer_mode);
+
+ if ((plist = H5P_object_verify(dxpl_id,H5P_DATASET_XFER)) == NULL)
+ HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl");
+
+ if (H5P_get_driver(plist) != H5FD_FPHDF5)
+ HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "incorrect VFL driver");
+
+ /* Get the transfer mode */
+ if (xfer_mode)
+ if (H5P_get(plist, H5D_XFER_IO_XFER_MODE_NAME, xfer_mode) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to get value");
+
+done:
+ FUNC_LEAVE_API(ret_value);
+}
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_fphdf5_fapl_get
+ * Purpose: Returns a file access property list which could be used
+ * to create another file the same as this one.
+ * Return: Success: Ptr to new file access property list with all
+ * fields copied from the file pointer.
+ * Failure: NULL
+ * Programmer: Bill Wendling
+ * 07. February 2003
* Modifications:
- *
*-------------------------------------------------------------------------
*/
static void *
@@ -506,6 +637,7 @@ H5FD_fphdf5_fapl_get(H5FD_t *_file)
/* These should both be copied. --rpm, 1999-08-13 */
fa->comm = file->comm;
+ fa->barrier_comm = file->barrier_comm;
fa->info = file->info;
/* Set return value */
@@ -537,15 +669,17 @@ H5FD_fphdf5_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxadd
H5FP_fphdf5_t *file = NULL;
MPI_File fh;
int mpi_amode;
+ int mpi_rank;
+ int mpi_size;
int mrc;
MPI_Offset size;
- const H5FD_fphdf5_fapl_t *fa = NULL;
H5FD_fphdf5_fapl_t _fa;
+ const H5FD_fphdf5_fapl_t *fa = NULL;
H5P_genplist_t *plist;
H5FD_t *ret_value = NULL;
unsigned file_id;
unsigned req_id;
- MPI_Status status;
+ MPI_Status status;
/* Flag to indicate that the file was successfully opened */
unsigned file_opened = FALSE;
@@ -555,13 +689,14 @@ H5FD_fphdf5_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxadd
/* check args */
assert(name);
- /* Obtain a pointer to mpio-specific file access properties */
+ /* Obtain a pointer to fphdf5-specific file access properties */
if ((plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS)) == NULL)
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "not a file access property list");
if (fapl_id == H5P_FILE_ACCESS_DEFAULT || H5P_get_driver(plist) != H5FD_FPHDF5) {
- _fa.comm = MPI_COMM_SELF; /*default*/
- _fa.info = MPI_INFO_NULL; /*default*/
+ _fa.comm = MPI_COMM_SELF; /*default*/
+ _fa.barrier_comm = MPI_COMM_SELF; /*default*/
+ _fa.info = MPI_INFO_NULL; /*default*/
fa = &_fa;
} else {
fa = H5P_get_driver_info(plist);
@@ -584,15 +719,18 @@ H5FD_fphdf5_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxadd
file_opened = TRUE;
- if (H5FP_request_open(name, (int)strlen(name), H5FP_OBJ_FILE, maxaddr,
+ if (H5FP_request_open(name, (int)strlen(name), H5FP_OBJ_FILE, (MPI_Offset)maxaddr,
&file_id, &req_id) == FAIL)
- HGOTO_ERROR(H5E_IO, H5E_CANTOPENFILE, NULL,
+ HGOTO_ERROR(H5E_FPHDF5, H5E_CANTOPENFILE, NULL,
"can't inform SAP of file open");
+ if ((mrc = MPI_Comm_rank(H5FP_SAP_COMM, &mpi_rank)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(NULL, "MPI_Comm_rank failed", mrc);
+
HDmemset(&status, 0, sizeof(status));
/* Get the file ID from the SAP */
- if (H5FP_my_rank == H5FP_capt_rank)
+ if ((unsigned)mpi_rank == H5FP_capt_rank)
if ((mrc = MPI_Recv(&file_id, 1, MPI_UNSIGNED, (int)H5FP_sap_rank,
H5FP_TAG_FILE_ID, H5FP_SAP_COMM,
&status)) != MPI_SUCCESS)
@@ -600,18 +738,18 @@ H5FD_fphdf5_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxadd
/* Broadcast the file ID */
if ((mrc = MPI_Bcast(&file_id, 1, MPI_UNSIGNED,
- 0, H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS)
+ (int)H5FP_capt_rank, H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS)
HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mrc);
/* The captain rank will get the filesize and broadcast it. */
- if (H5FP_my_rank == H5FP_capt_rank)
+ if ((unsigned)mpi_rank == H5FP_capt_rank)
/* Get current file size */
if ((mrc = MPI_File_get_size(fh, &size)) != MPI_SUCCESS)
HMPI_GOTO_ERROR(NULL, "MPI_File_get_size failed", mrc);
- /* Broadcast file-size */
+ /* Broadcast file size */
if ((mrc = MPI_Bcast(&size, sizeof(MPI_Offset), MPI_BYTE,
- 0, H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS)
+ (int)H5FP_capt_rank, H5FP_SAP_BARRIER_COMM)) != MPI_SUCCESS)
HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mrc);
/* Only if size > 0, truncate the file - if requested */
@@ -626,20 +764,24 @@ H5FD_fphdf5_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxadd
size = 0;
}
+ /* Grab the size of the communicator */
+ if ((mrc = MPI_Comm_size(H5FP_SAP_COMM, &mpi_size)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(NULL, "MPI_Comm_size failed", mrc);
+
/* Build the return value and initialize it */
if ((file = H5MM_calloc(sizeof(H5FP_fphdf5_t))) == NULL)
HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed");
+ file->file_id = file_id;
file->f = fh;
file->comm = fa->comm;
file->info = fa->info;
- file->mpi_rank = H5FP_my_rank;
- file->mpi_size = H5FP_comm_size;
+ file->mpi_rank = mpi_rank;
+ file->mpi_size = mpi_size;
file->eof = H5FD_fphdf5_MPIOff_to_haddr(size);
/* Set return value */
ret_value = (H5FD_t *)file;
- ret_value->fphdf5_id = file_id; /* the file descriptor used in FPHDF5 */
done:
if (!ret_value && file_opened)
@@ -651,39 +793,36 @@ done:
/*-------------------------------------------------------------------------
* Function: H5FD_fphdf5_close
- *
- * Purpose: Closes a file. This is collective.
- *
- * Return: Success: Non-negative
- *
- * Failure: Negative
- *
- * Programmer: Unknown
- * January 30, 1998
- *
+ * Purpose: Closes a file. This is collective.
+ * Return: Success: SUCCEED
+ * Failure: FAIL
+ * Programmer: Bill Wendling
+ * 07. February 2003
* Modifications:
- * Robb Matzke, 1998-02-18
- * Added the ACCESS_PARMS argument.
- *
- * Robb Matzke, 1999-08-06
- * Modified to work with the virtual file layer.
*-------------------------------------------------------------------------
*/
static herr_t
H5FD_fphdf5_close(H5FD_t *_file)
{
- H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file;
- int mpi_code; /* mpi return code */
- herr_t ret_value=SUCCEED; /* Return value */
+ H5FP_fphdf5_t *file = (H5FP_fphdf5_t *)_file;
+ H5FP_status_t status;
+ unsigned req_id;
+ int mrc;
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI(H5FD_fphdf5_close, FAIL);
+ /* check args */
assert(file);
- assert(H5FD_FPHDF5==file->pub.driver_id);
+ assert(file->pub.driver_id == H5FD_FPHDF5);
/* MPI_File_close sets argument to MPI_FILE_NULL */
- if (MPI_SUCCESS != (mpi_code=MPI_File_close(&(file->f)/*in,out*/)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_close failed", mpi_code);
+ if ((mrc = MPI_File_close(&file->f)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_close failed", mrc);
+
+ if (H5FP_request_close(_file, file->file_id, &req_id, &status) == FAIL)
+ HGOTO_ERROR(H5E_IO, H5E_CANTCLOSEFILE, FAIL,
+ "can't inform SAP of file close");
/* Clean up other stuff */
H5MM_xfree(file);
@@ -694,44 +833,45 @@ done:
/*-------------------------------------------------------------------------
- * Function: H5FD_fphdf5_query
- *
- * Purpose: Set the flags that this VFL driver is capable of supporting.
- * (listed in H5FDpublic.h)
- *
- * Return: Success: non-negative
- *
- * Failure: negative
- *
- * Programmer: Quincey Koziol
- * Friday, August 25, 2000
- *
+ * Function: H5FD_fphdf5_query
+ * Purpose: Set the flags that this VFL driver is capable of
+ * supporting. (listed in H5FDpublic.h)
+ * Return: Success: SUCCEED
+ * Failure: FAIL
+ * Programmer: Bill Wendling
+ * 07. February 2003
* Modifications:
- *
*-------------------------------------------------------------------------
*/
static herr_t
H5FD_fphdf5_query(const H5FD_t UNUSED *_file, unsigned long *flags /* out */)
{
- herr_t ret_value=SUCCEED;
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI(H5FD_fphdf5_query, FAIL);
/* Set the VFL feature flags that this driver supports */
- if(flags) {
- *flags=0;
- *flags|=H5FD_FEAT_AGGREGATE_METADATA; /* OK to aggregate metadata allocations */
-
- /* Distinguish between updating the metadata accumulator on writes and
- * reads. This is particularly (perhaps only, even) important for MPI-I/O
- * where we guarantee that writes are collective, but reads may not be.
- * If we were to allow the metadata accumulator to be written during a
- * read operation, the application would hang.
+ if (flags) {
+ *flags = 0;
+
+ /* OK to aggregate metadata allocations */
+ *flags |= H5FD_FEAT_AGGREGATE_METADATA;
+
+ /*
+ * Distinguish between updating the metadata accumulator on
+ * writes and reads. This is particularly (perhaps only, even)
+ * important for MPI-I/O where we guarantee that writes are
+ * collective, but reads may not be. If we were to allow the
+ * metadata accumulator to be written during a read operation,
+ * the application would hang.
*/
- *flags|=H5FD_FEAT_ACCUMULATE_METADATA_WRITE; /* OK to accumulate metadata for faster writes */
- *flags|=H5FD_FEAT_AGGREGATE_SMALLDATA; /* OK to aggregate "small" raw data allocations */
- } /* end if */
+ /* OK to accumulate metadata for faster writes */
+ *flags |= H5FD_FEAT_ACCUMULATE_METADATA_WRITE;
+
+ /* OK to aggregate "small" raw data allocations */
+ *flags |= H5FD_FEAT_AGGREGATE_SMALLDATA;
+ }
done:
FUNC_LEAVE_NOAPI(ret_value);
@@ -739,36 +879,31 @@ done:
/*-------------------------------------------------------------------------
- * Function: H5FD_fphdf5_get_eoa
- *
- * Purpose: Gets the end-of-address marker for the file. The EOA marker
- * is the first address past the last byte allocated in the
- * format address space.
- *
- * Return: Success: The end-of-address marker.
- *
- * Failure: HADDR_UNDEF
- *
- * Programmer: Robb Matzke
- * Friday, August 6, 1999
- *
+ * Function: H5FD_fphdf5_get_eoa
+ * Purpose: Gets the end-of-address marker for the file. The EOA
+ * marker is the first address past the last byte allocated
+ * in the format address space.
+ * Return: Success: The end-of-address marker.
+ * Failure: HADDR_UNDEF
+ * Programmer: Bill Wendling
+ * 07. February 2003
* Modifications:
- *
*-------------------------------------------------------------------------
*/
static haddr_t
H5FD_fphdf5_get_eoa(H5FD_t *_file)
{
- H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file;
- haddr_t ret_value; /* Return value */
+ H5FP_fphdf5_t *file = (H5FP_fphdf5_t *)_file;
+ haddr_t ret_value;
FUNC_ENTER_NOAPI(H5FD_fphdf5_get_eoa, HADDR_UNDEF);
+ /* check args */
assert(file);
- assert(H5FD_FPHDF5==file->pub.driver_id);
+ assert(file->pub.driver_id == H5FD_FPHDF5);
/* Set return value */
- ret_value=file->eoa;
+ ret_value = file->eoa;
done:
FUNC_LEAVE_NOAPI(ret_value);
@@ -776,33 +911,29 @@ done:
/*-------------------------------------------------------------------------
- * Function: H5FD_fphdf5_set_eoa
- *
- * Purpose: Set the end-of-address marker for the file. This function is
- * called shortly after an existing HDF5 file is opened in order
- * to tell the driver where the end of the HDF5 data is located.
- *
- * Return: Success: 0
- *
- * Failure: -1
- *
- * Programmer: Robb Matzke
- * Friday, August 6, 1999
- *
+ * Function: H5FD_fphdf5_set_eoa
+ * Purpose: Set the end-of-address marker for the file. This function
+ * is called shortly after an existing HDF5 file is opened
+ * in order to tell the driver where the end of the HDF5
+ * data is located.
+ * Return: Success: SUCCEED
+ * Failure: FAIL
+ * Programmer: Bill Wendling
+ * 06. February 2003
* Modifications:
- *
*-------------------------------------------------------------------------
*/
static herr_t
H5FD_fphdf5_set_eoa(H5FD_t *_file, haddr_t addr)
{
- H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file;
- herr_t ret_value=SUCCEED; /* Return value */
+ H5FP_fphdf5_t *file = (H5FP_fphdf5_t *)_file;
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI(H5FD_fphdf5_set_eoa, FAIL);
+ /* check args */
assert(file);
- assert(H5FD_FPHDF5==file->pub.driver_id);
+ assert(file->pub.driver_id == H5FD_FPHDF5);
file->eoa = addr;
@@ -812,43 +943,38 @@ done:
/*-------------------------------------------------------------------------
- * Function: H5FD_fphdf5_get_eof
- *
- * Purpose: Gets the end-of-file marker for the file. The EOF marker
- * is the real size of the file.
- *
- * The MPIO driver doesn't bother keeping this field updated
- * since that's a relatively expensive operation. Fortunately
- * the library only needs the EOF just after the file is opened
- * in order to determine whether the file is empty, truncated,
- * or okay. Therefore, any MPIO I/O function will set its value
- * to HADDR_UNDEF which is the error return value of this
- * function.
- *
- * Return: Success: The end-of-address marker.
- *
- * Failure: HADDR_UNDEF
- *
- * Programmer: Robb Matzke
- * Friday, August 6, 1999
- *
+ * Function: H5FD_fphdf5_get_eof
+ * Purpose: Gets the end-of-file marker for the file. The EOF marker
+ * is the real size of the file.
+ *
+ * The FPHDF5 driver doesn't bother keeping this field updated
+ * since that's a relatively expensive operation.
+ * Fortunately the library only needs the EOF just after the
+ * file is opened in order to determine whether the file is
+ * empty, truncated, or okay. Therefore, any MPIO I/O
+ * function will set its value to HADDR_UNDEF which is the
+ * error return value of this function.
+ * Return: Success: The end-of-address marker
+ * Failure: HADDR_UNDEF
+ * Programmer: Bill Wendling
+ * 06. February 2003
* Modifications:
- *
*-------------------------------------------------------------------------
*/
static haddr_t
H5FD_fphdf5_get_eof(H5FD_t *_file)
{
- H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file;
- haddr_t ret_value; /* Return value */
+ H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file;
+ haddr_t ret_value;
FUNC_ENTER_NOAPI(H5FD_fphdf5_get_eof, HADDR_UNDEF);
+ /* check args */
assert(file);
- assert(H5FD_FPHDF5==file->pub.driver_id);
+ assert(file->pub.driver_id == H5FD_FPHDF5);
/* Set return value */
- ret_value=file->eof;
+ ret_value = file->eof;
done:
FUNC_LEAVE_NOAPI(ret_value);
@@ -856,227 +982,222 @@ done:
/*-------------------------------------------------------------------------
- * Function: H5FD_fphdf5_get_handle
- *
- * Purpose: Returns the file handle of MPIO file driver.
- *
- * Returns: Non-negative if succeed or negative if fails.
- *
- * Programmer: Raymond Lu
- * Sept. 16, 2002
- *
+ * Function: H5FD_fphdf5_get_handle
+ * Purpose: Returns the file handle of MPIO file driver.
+ * Returns: Success: SUCCEED
+ * Failure: FAIL
+ * Programmer: Bill Wendling
+ * 06. February 2003
* Modifications:
- *
*-------------------------------------------------------------------------
*/
static herr_t
H5FD_fphdf5_get_handle(H5FD_t *_file, hid_t UNUSED fapl, void** file_handle)
{
- H5FP_fphdf5_t *file = (H5FP_fphdf5_t *)_file;
- herr_t ret_value = SUCCEED;
+ H5FP_fphdf5_t *file = (H5FP_fphdf5_t *)_file;
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI(H5FD_fphdf5_get_handle, FAIL);
+
+ /* check args */
+ assert(file);
- if(!file_handle)
+ if (!file_handle)
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file handle not valid");
- *file_handle = &(file->f);
+ *file_handle = &file->f;
done:
FUNC_LEAVE_NOAPI(ret_value);
}
-
+
/*-------------------------------------------------------------------------
- * Function: H5FD_fphdf5_read
- *
- * Purpose: Reads SIZE bytes of data from FILE beginning at address ADDR
- * into buffer BUF according to data transfer properties in
- * DXPL_ID using potentially complex file and buffer types to
- * effect the transfer.
- *
- * Reading past the end of the MPI file returns zeros instead of
- * failing. MPI is able to coalesce requests from different
- * processes (collective or independent).
- *
- * Return: Success: Zero. Result is stored in caller-supplied
- * buffer BUF.
- *
- * Failure: -1, Contents of buffer BUF are undefined.
- *
- * Programmer: rky, 1998-01-30
- *
+ * Function: H5FD_fphdf5_read
+ * Purpose: Reads SIZE bytes of data from FILE beginning at address
+ * ADDR into buffer BUF according to data transfer
+ * properties in DXPL_ID using potentially complex file and
+ * buffer types to effect the transfer.
+ *
+ * Reading past the end of the MPI file returns zeros
+ * instead of failing. MPI is able to coalesce requests
+ * from different processes (collective or independent).
+ * Return: Success: SUCCEED - Result is stored in caller-supplied
+ * buffer BUF
+ * Failure: FAIL - Contents of buffer BUF are undefined
+ * Programmer: Bill Wendling
+ * 10. February 2003
* Modifications:
- * Robb Matzke, 1998-02-18
- * Added the ACCESS_PARMS argument.
- *
- * rky, 1998-04-10
- * Call independent or collective MPI read, based on
- * ACCESS_PARMS.
- *
- * Albert Cheng, 1998-06-01
- * Added XFER_MODE to control independent or collective MPI
- * read.
- *
- * rky, 1998-08-16
- * Use BTYPE, FTYPE, and DISP from access parms. The guts of
- * H5FD_fphdf5_read and H5FD_fphdf5_write should be replaced by a
- * single dual-purpose routine.
- *
- * Robb Matzke, 1999-04-21
- * Changed XFER_MODE to XFER_PARMS for all H5F_*_read()
- * callbacks.
- *
- * Robb Matzke, 1999-07-28
- * The ADDR argument is passed by value.
- *
- * Robb Matzke, 1999-08-06
- * Modified to work with the virtual file layer.
- *
- * Quincey Koziol, 2002-05-14
- * Only call MPI_Get_count if we can use MPI_BYTE for the MPI type
- * for the I/O transfer. Someday we might include code to decode
- * the MPI type used for more complicated transfers and call
- * MPI_Get_count all the time.
- *
- * Quincey Koziol - 2002/06/17
- * Removed 'disp' parameter from H5FD_fphdf5_setup routine and use
- * the address of the dataset in MPI_File_set_view() calls, as
- * necessary.
- *
- * Quincey Koziol - 2002/06/24
- * Removed "lazy" MPI_File_set_view() calls, since they would fail
- * if the first I/O was a collective I/O using MPI derived types
- * and the next I/O was an independent I/O.
- *
*-------------------------------------------------------------------------
*/
static herr_t
-H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t addr, size_t size,
- void *buf/*out*/)
+H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id,
+ haddr_t addr, size_t size, void *buf)
{
-#if 0
- H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file;
- const H5FD_fphdf5_dxpl_t *dx=NULL;
- H5FD_fphdf5_dxpl_t _dx;
- MPI_Offset mpi_off, mpi_disp;
- MPI_Status mpi_stat;
- int mpi_code; /* mpi return code */
- MPI_Datatype buf_type, file_type;
- int size_i, bytes_read, n;
- unsigned use_view_this_time=0;
- H5P_genplist_t *plist; /* Property list pointer */
- herr_t ret_value=SUCCEED;
+ H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file;
+ MPI_Offset mpi_off;
+ MPI_Offset mpi_disp;
+ MPI_Status status;
+ int mrc;
+ MPI_Datatype buf_type;
+ MPI_Datatype file_type;
+ int size_i;
+ int bytes_read;
+ int n;
+ unsigned use_view_this_time = 0;
+ H5P_genplist_t *plist;
+ H5FD_mpio_xfer_t xfer_mode = H5FD_MPIO_INDEPENDENT;
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI(H5FD_fphdf5_read, FAIL);
+ /* check args */
assert(file);
- assert(H5FD_FPHDF5==file->pub.driver_id);
- /* Make certain we have the correct type of property list */
- assert(H5I_GENPROP_LST==H5I_get_type(dxpl_id));
- assert(TRUE==H5P_isa_class(dxpl_id,H5P_DATASET_XFER));
+ assert(file->pub.driver_id == H5FD_FPHDF5);
assert(buf);
+ /* make certain we have the correct type of property list */
+ assert(H5I_get_type(dxpl_id) == H5I_GENPROP_LST);
+ assert(H5P_isa_class(dxpl_id, H5P_DATASET_XFER) == TRUE);
+
/* Portably initialize MPI status variable */
- HDmemset(&mpi_stat,0,sizeof(MPI_Status));
+ HDmemset(&status, 0, sizeof(MPI_Status));
+
+ /* Some numeric conversions */
+ if (H5FD_fphdf5_haddr_to_MPIOff(addr, &mpi_off) < 0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL,
+ "can't convert from haddr_t to MPI offset");
- /* some numeric conversions */
- if (H5FD_fphdf5_haddr_to_MPIOff(addr, &mpi_off/*out*/)<0)
- HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off");
size_i = (int)size;
+
if ((hsize_t)size_i != size)
- HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i");
+ HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size_t to int");
+
+ /* If metadata, check the metadata cache first */
+ if (mem_type != H5FD_MEM_DRAW) {
+ /*
+ * This is metadata - we want to try to read it from the SAP
+ * first.
+ */
+ H5FP_status_t sap_status;
+ unsigned req_id;
+
+ if (H5FP_request_read_metadata(_file, file->file_id, mem_type, mpi_off,
+ size, (uint8_t**)&buf, &bytes_read, &req_id,
+ &sap_status) != SUCCEED) {
+ /* FIXME: The read failed, for some reason */
+HDfprintf(stderr, "%s:%d: Metadata cache read failed!\n", FUNC, __LINE__);
+ }
+
+ if (sap_status == H5FP_STATUS_OK) {
+ /* WAH-HOO! We've found it! We can leave now */
+ goto finished_read;
+ } else if (sap_status != H5FP_STATUS_MDATA_NOT_CACHED) {
+ /* FIXME: something bad happened */
+HDfprintf(stderr, "%s:%d: Metadata cache read failed!\n", FUNC, __LINE__);
+ }
+ }
/* Obtain the data transfer properties */
- if(NULL == (plist = H5I_object(dxpl_id)))
+ if ((plist = H5I_object(dxpl_id)) == NULL)
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list");
- if (H5FD_FPHDF5!=H5P_get_driver(plist)) {
- _dx.xfer_mode = H5FD_FPHDF5_INDEPENDENT; /*the default*/
- dx = &_dx;
- } else {
- dx = H5P_get_driver_info(plist);
- assert(dx);
- }
-
+
+ if (H5P_get_driver(plist) == H5FD_FPHDF5)
+ /* Get the transfer mode */
+ xfer_mode = H5P_peek_unsigned(plist, H5D_XFER_IO_XFER_MODE_NAME);
+
/*
- * Set up for a fancy xfer using complex types, or single byte block. We
- * wouldn't need to rely on the use_view field if MPI semantics allowed
- * us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which
- * could mean "use MPI_BYTE" by convention).
+ * Set up for a fancy xfer using complex types, or single byte block.
+ * We wouldn't need to rely on the use_view field if MPI semantics
+ * allowed us to test that btype == ftype == MPI_BYTE (or even
+ * MPI_TYPE_NULL, which could mean "use MPI_BYTE" by convention).
*/
- if(H5P_exist_plist(plist,H5FD_FPHDF5_XFER_USE_VIEW_NAME)>0)
- if(H5P_get(plist,H5FD_FPHDF5_XFER_USE_VIEW_NAME,&use_view_this_time)<0)
+ if (H5P_exist_plist(plist, H5FD_FPHDF5_XFER_USE_VIEW_NAME) > 0)
+ if (H5P_get(plist, H5FD_FPHDF5_XFER_USE_VIEW_NAME, &use_view_this_time) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property");
if (use_view_this_time) {
- /* prepare for a full-blown xfer using btype, ftype, and disp */
- if(H5P_get(plist,H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0)
+ /* Prepare for a full-blown xfer using btype, ftype, and disp */
+ if (H5P_get(plist, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property");
- if(H5P_get(plist,H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME,&file_type)<0)
+
+ if (H5P_get(plist, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property");
- /* When using types, use the address as the displacement for
+ /*
+ * When using types, use the address as the displacement for
* MPI_File_set_view and reset the address for the read to zero
*/
- mpi_disp=mpi_off;
- mpi_off=0;
- } /* end if */
- else {
+ mpi_disp = mpi_off;
+ mpi_off = 0;
+ } else {
/*
* Prepare for a simple xfer of a contiguous block of bytes. The
* btype, ftype, and disp fields are not used.
*/
buf_type = MPI_BYTE;
file_type = MPI_BYTE;
- mpi_disp = 0; /* mpi_off is alread set */
- } /* end else */
+ mpi_disp = 0; /* mpi_off is already set */
+ }
/*
* Set the file view when we are using MPI derived types
*/
- if (use_view_this_time) {
+ if (use_view_this_time)
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
- if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code);
- } /* end if */
+ if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)mpi_disp, MPI_BYTE,
+ file_type, (char*)"native",
+ file->info)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc);
/* Read the data. */
- assert(H5FD_FPHDF5_INDEPENDENT==dx->xfer_mode || H5FD_FPHDF5_COLLECTIVE==dx->xfer_mode);
- if (H5FD_FPHDF5_INDEPENDENT==dx->xfer_mode) {
- if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code);
+ assert(xfer_mode == H5FD_MPIO_INDEPENDENT || xfer_mode == H5FD_MPIO_COLLECTIVE);
+
+ if (xfer_mode == H5FD_MPIO_INDEPENDENT) {
+ if ((mrc = MPI_File_read_at(file->f, mpi_off, buf, size_i,
+ buf_type, &status)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mrc);
} else {
- if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat )))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code);
+ if ((mrc = MPI_File_read_at_all(file->f, mpi_off, buf, size_i,
+ buf_type, &status )) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mrc);
}
- /* KLUDGE, Robb Matzke, 2000-12-29
+ /*
+ * KLUDGE, Robb Matzke, 2000-12-29
* The LAM implementation of MPI_Get_count() says
+ *
* MPI_Get_count: invalid argument (rank 0, MPI_COMM_WORLD)
+ *
* So I'm commenting this out until it can be investigated. The
- * returned `bytes_written' isn't used anyway because of Kim's
- * kludge to avoid bytes_written<0. Likewise in H5FD_fphdf5_write(). */
+ * returned `bytes_written' isn't used anyway because of Kim's kludge
+ * to avoid bytes_written < 0. Likewise in H5FD_fphdf5_write().
+ */
#ifdef H5_HAVE_MPI_GET_COUNT /* Bill and Albert's kludge*/
- /* Yet Another KLUDGE, Albert Cheng & Bill Wendling, 2001-05-11.
+ /*
+ * Yet Another KLUDGE, Albert Cheng & Bill Wendling, 2001-05-11.
* Many systems don't support MPI_Get_count so we need to do a
- * configure thingy to fix this. */
+ * configure thingy to fix this.
+ */
- /* Calling MPI_Get_count with "MPI_BYTE" is only valid when we actually
- * had the 'buf_type' set to MPI_BYTE -QAK
+ /*
+ * Calling MPI_Get_count with "MPI_BYTE" is only valid when we
+ * actually had the 'buf_type' set to MPI_BYTE -QAK
*/
- if(use_view_this_time) {
- /* Figure out the mapping from the MPI 'buf_type' to bytes, someday...
- * If this gets fixed (and MPI_Get_count() is reliable), the
- * kludge below where the 'bytes_read' value from MPI_Get_count() is
- * overwritten with the 'size_i' parameter can be removed. -QAK
+ if (use_view_this_time) {
+ /*
+ * Figure out the mapping from the MPI 'buf_type' to bytes,
+ * someday... If this gets fixed (and MPI_Get_count() is
+ * reliable), the kludge below where the 'bytes_read' value from
+ * MPI_Get_count() is overwritten with the 'size_i' parameter can
+ * be removed. -QAK
*/
- } /* end if */
- else {
+ } else {
/* How many bytes were actually read? */
- if (MPI_SUCCESS != (mpi_code=MPI_Get_count(&mpi_stat, MPI_BYTE, &bytes_read)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Get_count failed", mpi_code);
- } /* end else */
+ if ((mrc = MPI_Get_count(&status, MPI_BYTE, &bytes_read)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_Get_count failed", mrc);
+ }
#endif /* H5_HAVE_MPI_GET_COUNT */
/*
@@ -1086,355 +1207,507 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t a
*/
bytes_read = size_i;
- /* Check for read failure */
- if (bytes_read<0 || bytes_read>size_i)
- HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed");
-
/*
* Reset the file view when we used MPI derived types
*/
- if (use_view_this_time) {
+ if (use_view_this_time)
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
- if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code);
- } /* end if */
-
+ if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE,
+ (char*)"native", file->info)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc);
+
+finished_read:
+ /* Check for read failure */
+ if (bytes_read < 0 || bytes_read > size_i)
+ HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed");
+
/*
- * This gives us zeroes beyond end of physical MPI file. What about
+ * This gives us zeroes beyond end of physical MPI file. What about
* reading past logical end of HDF5 file???
*/
- if ((n=(size_i-bytes_read)) > 0) {
- if (use_view_this_time) {
+ n = size_i - bytes_read;
+
+ if (n > 0) {
+ if (use_view_this_time)
/*
* INCOMPLETE rky 1998-09-18
* Haven't implemented reading zeros beyond EOF. What to do???
*/
HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "eof file read failed");
- } else {
- memset((char*)buf+bytes_read, 0, (size_t)n);
- }
+
+ memset((char*)buf + bytes_read, 0, (size_t)n);
}
done:
FUNC_LEAVE_NOAPI(ret_value);
-#else
- return SUCCEED;
-#endif
}
/*-------------------------------------------------------------------------
- * Function: H5FD_fphdf5_write
- *
- * Purpose: Writes SIZE bytes of data to FILE beginning at address ADDR
- * from buffer BUF according to data transfer properties in
- * DXPL_ID using potentially complex file and buffer types to
- * effect the transfer.
- *
- * MPI is able to coalesce requests from different processes
- * (collective and independent).
- *
- * Return: Success: Zero. USE_TYPES and OLD_USE_TYPES in the
- * access params are altered.
- *
- * Failure: -1, USE_TYPES and OLD_USE_TYPES in the
- * access params may be altered.
- *
- * Programmer: Unknown
- * January 30, 1998
- *
+ * Function: H5FD_fphdf5_write
+ * Purpose: Writes SIZE bytes of data to FILE beginning at address
+ * ADDR from buffer BUF according to data transfer
+ * properties in DXPL_ID using potentially complex file and
+ * buffer types to effect the transfer.
+ *
+ * MPI is able to coalesce requests from different processes
+ * (collective and independent).
+ * Return: Success: SUCCEED - USE_TYPES and OLD_USE_TYPES in the
+ * access params are altered.
+ * Failure: FAIL - USE_TYPES and OLD_USE_TYPES in the
+ * access params may be altered.
+ * Programmer: Bill Wendling
+ * 10. February 2003
* Modifications:
- * rky, 1998-08-28
- * If the file->allsame flag is set, we assume that all the
- * procs in the relevant MPI communicator will write identical
- * data at identical offsets in the file, so only proc 0 will
- * write, and all other procs will wait for p0 to finish. This
- * is useful for writing metadata, for example. Note that we
- * don't _check_ that the data is identical. Also, the mechanism
- * we use to eliminate the redundant writes is by requiring a
- * call to H5FD_fphdf5_tas_allsame before the write, which is
- * rather klugey. Would it be better to pass a parameter to
- * low-level writes like H5F_block_write and H5F_low_write,
- * instead? Or...??? Also, when I created this mechanism I
- * wanted to minimize the difference in behavior between the old
- * way of doing things (i.e., all procs write) and the new way,
- * so the writes are eliminated at the very lowest level, here
- * in H5FD_fphdf5_write. It may be better to rethink that, and
- * short-circuit the writes at a higher level (e.g., at the
- * points in the code where H5FD_fphdf5_tas_allsame is called).
- *
- *
- * Robb Matzke, 1998-02-18
- * Added the ACCESS_PARMS argument.
- *
- * rky, 1998-04-10
- * Call independent or collective MPI write, based on
- * ACCESS_PARMS.
- *
- * rky, 1998-04-24
- * Removed redundant write from H5FD_fphdf5_write.
- *
- * Albert Cheng, 1998-06-01
- * Added XFER_MODE to control independent or collective MPI
- * write.
- *
- * rky, 1998-08-16
- * Use BTYPE, FTYPE, and DISP from access parms. The guts of
- * H5FD_fphdf5_read and H5FD_fphdf5_write should be replaced by a
- * single dual-purpose routine.
- *
- * rky, 1998-08-28
- * Added ALLSAME parameter to make all but proc 0 skip the
- * actual write.
- *
- * Robb Matzke, 1999-04-21
- * Changed XFER_MODE to XFER_PARMS for all H5FD_*_write()
- * callbacks.
- *
- * Robb Matzke, 1999-07-28
- * The ADDR argument is passed by value.
- *
- * Robb Matzke, 1999-08-06
- * Modified to work with the virtual file layer.
- *
- * Albert Cheng, 1999-12-19
- * When only-p0-write-allsame-data, p0 Bcasts the
- * ret_value to other processes. This prevents
- * a racing condition (that other processes try to
- * read the file before p0 finishes writing) and also
- * allows all processes to report the same ret_value.
- *
- * Kim Yates, Pat Weidhaas, 2000-09-26
- * Move block of coding where only p0 writes after the
- * MPI_File_set_view call.
- *
- * Quincey Koziol, 2002-05-10
- * Instead of always writing metadata from process 0, spread the
- * burden among all the processes by using a round-robin rotation
- * scheme.
- *
- * Quincey Koziol, 2002-05-10
- * Removed allsame code, keying off the type parameter instead.
- *
- * Quincey Koziol, 2002-05-14
- * Only call MPI_Get_count if we can use MPI_BYTE for the MPI type
- * for the I/O transfer. Someday we might include code to decode
- * the MPI type used for more complicated transfers and call
- * MPI_Get_count all the time.
- *
- * Quincey Koziol - 2002/06/17
- * Removed 'disp' parameter from H5FD_fphdf5_setup routine and use
- * the address of the dataset in MPI_File_set_view() calls, as
- * necessary.
- *
- * Quincey Koziol - 2002/06/24
- * Removed "lazy" MPI_File_set_view() calls, since they would fail
- * if the first I/O was a collective I/O using MPI derived types
- * and the next I/O was an independent I/O.
- *
- * Quincey Koziol - 2002/07/18
- * Added "block_before_meta_write" dataset transfer flag, which
- * is set during writes from a metadata cache flush and indicates
- * that all the processes must sync up before (one of them)
- * writing metadata.
- *
*-------------------------------------------------------------------------
*/
static herr_t
-H5FD_fphdf5_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
- size_t size, const void *buf)
+H5FD_fphdf5_write(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id,
+ haddr_t addr, size_t size, const void *buf)
{
-#if 0
- H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file;
- const H5FD_fphdf5_dxpl_t *dx=NULL;
- H5FD_fphdf5_dxpl_t _dx;
- MPI_Offset mpi_off, mpi_disp;
- MPI_Status mpi_stat;
- MPI_Datatype buf_type, file_type;
- int mpi_code; /* MPI return code */
- int size_i, bytes_written;
- unsigned use_view_this_time=0;
- unsigned block_before_meta_write=0; /* Whether to block before a metadata write */
- H5P_genplist_t *plist; /* Property list pointer */
- herr_t ret_value=SUCCEED;
+ H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file;
+ MPI_Offset mpi_off;
+ MPI_Offset mpi_disp;
+ MPI_Status status;
+ MPI_Datatype buf_type;
+ MPI_Datatype file_type;
+ int mrc;
+ int size_i;
+ int bytes_written;
+ unsigned use_view_this_time = 0;
+ unsigned block_before_meta_write = 0;
+ H5P_genplist_t *plist;
+ H5FD_mpio_xfer_t xfer_mode = H5FD_MPIO_INDEPENDENT;
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI(H5FD_fphdf5_write, FAIL);
+ /* check args */
assert(file);
- assert(H5FD_FPHDF5==file->pub.driver_id);
- /* Make certain we have the correct type of property list */
- assert(H5I_GENPROP_LST==H5I_get_type(dxpl_id));
- assert(TRUE==H5P_isa_class(dxpl_id,H5P_DATASET_XFER));
+ assert(file->pub.driver_id == H5FD_FPHDF5);
assert(buf);
+ /* Make certain we have the correct type of property list */
+ assert(H5I_get_type(dxpl_id) == H5I_GENPROP_LST);
+ assert(H5P_isa_class(dxpl_id, H5P_DATASET_XFER) == TRUE);
+
/* Portably initialize MPI status variable */
- HDmemset(&mpi_stat,0,sizeof(MPI_Status));
+ HDmemset(&status, 0, sizeof(MPI_Status));
/* some numeric conversions */
- if (H5FD_fphdf5_haddr_to_MPIOff(addr, &mpi_off)<0)
- HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off");
+ if (H5FD_fphdf5_haddr_to_MPIOff(addr, &mpi_off) < 0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL,
+ "can't convert from haddr to MPI off");
+
size_i = (int)size;
+
if ((hsize_t)size_i != size)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i");
+
+ /* FIXME: FPHDF5 stuff should go here */
+
+ /* If metadata, write to the metadata cache */
+ if (mem_type != H5FD_MEM_DRAW) {
+#if 0
+ unsigned req_id;
+ H5FP_status_t sap_status;
+
+ if (H5FP_request_write_metadata(file, file->file_id, uint8_t *obj_oid,
+ mem_type, mpi_off, size,
+ buf, &req_id, &sap_status)) {
+ }
+#endif
+ } else {
+ }
+
+
/* Obtain the data transfer properties */
- if(NULL == (plist = H5I_object(dxpl_id)))
+ if ((plist = H5I_object(dxpl_id)) == NULL)
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list");
- if (H5FD_FPHDF5!=H5P_get_driver(plist)) {
- _dx.xfer_mode = H5FD_FPHDF5_INDEPENDENT; /*the default*/
- dx = &_dx;
+
+ if (H5P_get_driver(plist) == H5FD_FPHDF5)
+ /* Get the transfer mode */
+ xfer_mode = H5P_peek_unsigned(plist, H5D_XFER_IO_XFER_MODE_NAME);
+
+ /*
+ * Set up for a fancy xfer using complex types, or single byte block.
+ * We wouldn't need to rely on the use_view field if MPI semantics
+ * allowed us to test that btype == ftype == MPI_BYTE (or even
+ * MPI_TYPE_NULL, which could mean "use MPI_BYTE" by convention).
+ */
+ if (H5P_exist_plist(plist, H5FD_FPHDF5_XFER_USE_VIEW_NAME) > 0)
+ if (H5P_get(plist, H5FD_FPHDF5_XFER_USE_VIEW_NAME, &use_view_this_time) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property");
+
+ if (use_view_this_time) {
+ /* prepare for a full-blown xfer using btype, ftype, and disp */
+ if (H5P_get(plist, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property");
+
+ if (H5P_get(plist, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property");
+
+ /*
+ * When using types, use the address as the displacement for
+ * MPI_File_set_view and reset the address for the read to zero
+ */
+ mpi_disp = mpi_off;
+ mpi_off = 0;
} else {
- dx = H5P_get_driver_info(plist);
- assert(dx);
+ /*
+ * Prepare for a simple xfer of a contiguous block of bytes. The
+ * btype, ftype, and disp fields are not used.
+ */
+ buf_type = MPI_BYTE;
+ file_type = MPI_BYTE;
+ mpi_disp = 0; /* mpi_off is already set */
}
+
+ /*
+ * Set the file view when we are using MPI derived types
+ */
+ if (use_view_this_time)
+ /*OKAY: CAST DISCARDS CONST QUALIFIER*/
+ if ((mrc = MPI_File_set_view(file->f, mpi_disp, MPI_BYTE,
+ file_type, (char*)"native",
+ file->info)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc);
+ /* Metadata specific actions */
+ if (mem_type != H5FD_MEM_DRAW) {
+ /*
+ * Check if we need to syncronize all processes before attempting
+ * metadata write (Prevents race condition where the process
+ * writing the metadata goes ahead and writes the metadata to the
+ * file before all the processes have read the data,
+ * "transmitting" data from the "future" to the reading process.
+ * -QAK )
+ */
+ if (H5P_exist_plist(plist, H5AC_BLOCK_BEFORE_META_WRITE_NAME) > 0)
+ if (H5P_get(plist, H5AC_BLOCK_BEFORE_META_WRITE_NAME, &block_before_meta_write) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get H5AC property");
+
+ if (block_before_meta_write)
+ if ((mrc = MPI_Barrier(file->comm)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mrc);
+ }
+
+ /* Write the data. */
+ assert(xfer_mode == H5FD_MPIO_INDEPENDENT || xfer_mode == H5FD_MPIO_COLLECTIVE);
+
+ if (xfer_mode == H5FD_MPIO_INDEPENDENT) {
+ /*OKAY: CAST DISCARDS CONST QUALIFIER*/
+ if ((mrc = MPI_File_write_at(file->f, mpi_off, (void*)buf,
+ size_i, buf_type, &status)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mrc);
+ } else {
+ /*OKAY: CAST DISCARDS CONST QUALIFIER*/
+ if ((mrc = MPI_File_write_at_all(file->f, mpi_off, (void*)buf,
+ size_i, buf_type, &status)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mrc);
+ }
+
/*
- * Set up for a fancy xfer using complex types, or single byte block. We
- * wouldn't need to rely on the use_view field if MPI semantics allowed
- * us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which
- * could mean "use MPI_BYTE" by convention).
+ * KLUDGE, Robb Matzke, 2000-12-29
+ * The LAM implementation of MPI_Get_count() says
+ *
+ * MPI_Get_count: invalid argument (rank 0, MPI_COMM_WORLD)
+ *
+ * So I'm commenting this out until it can be investigated. The
+ * returned `bytes_written' isn't used anyway because of Kim's kludge
+ * to avoid bytes_written<0. Likewise in H5FD_fphdf5_read().
*/
- if(H5P_exist_plist(plist,H5FD_FPHDF5_XFER_USE_VIEW_NAME)>0)
- if(H5P_get(plist,H5FD_FPHDF5_XFER_USE_VIEW_NAME,&use_view_this_time)<0)
+
+#ifdef H5_HAVE_MPI_GET_COUNT /* Bill and Albert's kludge*/
+ /*
+ * Yet Another KLUDGE, Albert Cheng & Bill Wendling, 2001-05-11.
+ * Many systems don't support MPI_Get_count so we need to do a
+ * configure thingy to fix this.
+ */
+
+ /*
+ * Calling MPI_Get_count with "MPI_BYTE" is only valid when we
+ * actually had the 'buf_type' set to MPI_BYTE -QAK
+ */
+ if (use_view_this_time) {
+ /*
+ * Figure out the mapping from the MPI 'buf_type' to bytes,
+ * someday... If this gets fixed (and MPI_Get_count() is
+ * reliable), the kludge below where the 'bytes_written' value
+ * from MPI_Get_count() is overwritten with the 'size_i'
+ * parameter can be removed. -QAK
+ */
+ } else {
+ /* How many bytes were actually written? */
+ if ((mrc = MPI_Get_count(&status, MPI_BYTE, &bytes_written)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_Get_count failed", mrc);
+ }
+#endif /* H5_HAVE_MPI_GET_COUNT */
+
+ /*
+ * KLUGE rky, 1998-02-02
+ *
+ * MPI_Get_count incorrectly returns negative count; fake a complete
+ * write.
+ */
+ bytes_written = size_i;
+
+ /* Check for write failure */
+ if (bytes_written < 0 || bytes_written > size_i)
+ HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file write failed");
+
+ /*
+ * Reset the file view when we used MPI derived types
+ */
+ if (use_view_this_time)
+ /*OKAY: CAST DISCARDS CONST QUALIFIER*/
+ if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE,
+ (char*)"native",
+ file->info)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc);
+
+ /* Forget the EOF value (see H5FD_fphdf5_get_eof()) --rpm 1999-08-06 */
+ file->eof = HADDR_UNDEF;
+
+done:
+ /* Guard against getting into metadate broadcast in failure cases */
+ if (ret_value != FAIL)
+ /*
+ * If only p<round> writes, need to broadcast the ret_value to
+ * other processes
+ */
+ if (mem_type != H5FD_MEM_DRAW) {
+ if ((mrc = MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE,
+ file->mpi_round, file->comm)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mrc);
+
+ /* Round-robin rotate to the next process */
+ file->mpi_round = (++file->mpi_round) % file->mpi_size;
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value);
+}
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_fphdf5_write_real
+ * Purpose: Split off from the H5FD_fphdf5_write() function. It does
+ * the real work of writing to the file.
+ *
+ * Writes SIZE bytes of data to FILE beginning at address
+ * ADDR from buffer BUF according to data transfer
+ * properties in DXPL_ID using potentially complex file and
+ * buffer types to effect the transfer.
+ *
+ * MPI is able to coalesce requests from different processes
+ * (collective and independent).
+ * Return: Success: SUCCEED - USE_TYPES and OLD_USE_TYPES in the
+ * access params are altered.
+ * Failure: FAIL - USE_TYPES and OLD_USE_TYPES in the
+ * access params may be altered.
+ * Programmer: Bill Wendling
+ * 10. February 2003
+ * Modifications:
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5FD_fphdf5_write_real(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id,
+ MPI_Offset mpi_off, int size, const void *buf)
+{
+ H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file;
+ MPI_Offset mpi_disp;
+ MPI_Status status;
+ MPI_Datatype buf_type;
+ MPI_Datatype file_type;
+ int mrc;
+ int size_i;
+ int bytes_written;
+ unsigned use_view_this_time = 0;
+ unsigned block_before_meta_write = 0;
+ H5P_genplist_t *plist;
+ H5FD_mpio_xfer_t xfer_mode = H5FD_MPIO_INDEPENDENT;
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI(H5FD_fphdf5_write_real, FAIL);
+
+ /* check args */
+ assert(file);
+ assert(file->pub.driver_id == H5FD_FPHDF5);
+ assert(buf);
+
+ /* Make certain we have the correct type of property list */
+ assert(H5I_get_type(dxpl_id) == H5I_GENPROP_LST);
+ assert(H5P_isa_class(dxpl_id, H5P_DATASET_XFER) == TRUE);
+
+ /* Portably initialize MPI status variable */
+ HDmemset(&status, 0, sizeof(MPI_Status));
+
+ /* Obtain the data transfer properties */
+ if ((plist = H5I_object(dxpl_id)) == NULL)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list");
+
+ if (H5P_get_driver(plist) == H5FD_FPHDF5)
+ /* Get the transfer mode */
+ xfer_mode = H5P_peek_unsigned(plist, H5D_XFER_IO_XFER_MODE_NAME);
+
+ /*
+ * Set up for a fancy xfer using complex types, or single byte block.
+ * We wouldn't need to rely on the use_view field if MPI semantics
+ * allowed us to test that btype == ftype == MPI_BYTE (or even
+ * MPI_TYPE_NULL, which could mean "use MPI_BYTE" by convention).
+ */
+ if (H5P_exist_plist(plist, H5FD_FPHDF5_XFER_USE_VIEW_NAME) > 0)
+ if (H5P_get(plist, H5FD_FPHDF5_XFER_USE_VIEW_NAME, &use_view_this_time) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property");
if (use_view_this_time) {
/* prepare for a full-blown xfer using btype, ftype, and disp */
- if(H5P_get(plist,H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0)
+ if (H5P_get(plist, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property");
- if(H5P_get(plist,H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME,&file_type)<0)
+
+ if (H5P_get(plist, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property");
- /* When using types, use the address as the displacement for
+ /*
+ * When using types, use the address as the displacement for
* MPI_File_set_view and reset the address for the read to zero
*/
- mpi_disp=mpi_off;
- mpi_off=0;
- } /* end if */
- else {
+ mpi_disp = mpi_off;
+ mpi_off = 0;
+ } else {
/*
- * Prepare for a simple xfer of a contiguous block of bytes.
- * The btype, ftype, and disp fields are not used.
+ * Prepare for a simple xfer of a contiguous block of bytes. The
+ * btype, ftype, and disp fields are not used.
*/
buf_type = MPI_BYTE;
file_type = MPI_BYTE;
- mpi_disp = 0; /* mpi_off is already set */
- } /* end else */
+ mpi_disp = 0; /* mpi_off is already set */
+ }
/*
* Set the file view when we are using MPI derived types
*/
- if (use_view_this_time) {
+ if (use_view_this_time)
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
- if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code);
- } /* end if */
+ if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)mpi_disp, MPI_BYTE,
+ file_type, (char*)"native",
+ file->info)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc);
/* Metadata specific actions */
- if(type!=H5FD_MEM_DRAW) {
- /* Check if we need to syncronize all processes before attempting metadata write
- * (Prevents race condition where the process writing the metadata goes ahead
- * and writes the metadata to the file before all the processes have
- * read the data, "transmitting" data from the "future" to the reading
- * process. -QAK )
+ if (mem_type != H5FD_MEM_DRAW) {
+ /*
+ * Check if we need to syncronize all processes before attempting
+ * metadata write (Prevents race condition where the process
+ * writing the metadata goes ahead and writes the metadata to the
+ * file before all the processes have read the data,
+ * "transmitting" data from the "future" to the reading process.
+ * -QAK )
*/
- if(H5P_exist_plist(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME)>0)
- if(H5P_get(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,&block_before_meta_write)<0)
+ if (H5P_exist_plist(plist, H5AC_BLOCK_BEFORE_META_WRITE_NAME) > 0)
+ if (H5P_get(plist, H5AC_BLOCK_BEFORE_META_WRITE_NAME, &block_before_meta_write) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get H5AC property");
- if(block_before_meta_write)
- if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code);
-
- /* Only p<round> will do the actual write if all procs in comm write same metadata */
- if (H5_fphdf5_1_metawrite_g) {
- if (file->mpi_rank != file->mpi_round) {
- HGOTO_DONE(SUCCEED) /* skip the actual write */
- }
- }
- } /* end if */
+ if (block_before_meta_write)
+ if ((mrc = MPI_Barrier(file->comm)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mrc);
+ }
/* Write the data. */
- assert(H5FD_MPIO_INDEPENDENT==dx->xfer_mode || H5FD_MPIO_COLLECTIVE==dx->xfer_mode);
- if (H5FD_MPIO_INDEPENDENT==dx->xfer_mode) {
+ assert(xfer_mode == H5FD_MPIO_INDEPENDENT || xfer_mode == H5FD_MPIO_COLLECTIVE);
+
+ if (xfer_mode == H5FD_MPIO_INDEPENDENT) {
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
- if (MPI_SUCCESS != (mpi_code=MPI_File_write_at(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code);
+ if ((mrc = MPI_File_write_at(file->f, mpi_off, (void*)buf,
+ size_i, buf_type, &status)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mrc);
} else {
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
- if (MPI_SUCCESS != (mpi_code=MPI_File_write_at_all(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code);
+ if ((mrc = MPI_File_write_at_all(file->f, mpi_off, (void*)buf,
+ size_i, buf_type, &status)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mrc);
}
- /* KLUDGE, Robb Matzke, 2000-12-29
+ /*
+ * KLUDGE, Robb Matzke, 2000-12-29
* The LAM implementation of MPI_Get_count() says
+ *
* MPI_Get_count: invalid argument (rank 0, MPI_COMM_WORLD)
+ *
* So I'm commenting this out until it can be investigated. The
- * returned `bytes_written' isn't used anyway because of Kim's
- * kludge to avoid bytes_written<0. Likewise in H5FD_fphdf5_read(). */
+ * returned `bytes_written' isn't used anyway because of Kim's kludge
+ * to avoid bytes_written<0. Likewise in H5FD_fphdf5_read().
+ */
#ifdef H5_HAVE_MPI_GET_COUNT /* Bill and Albert's kludge*/
- /* Yet Another KLUDGE, Albert Cheng & Bill Wendling, 2001-05-11.
+ /*
+ * Yet Another KLUDGE, Albert Cheng & Bill Wendling, 2001-05-11.
* Many systems don't support MPI_Get_count so we need to do a
- * configure thingy to fix this. */
+ * configure thingy to fix this.
+ */
- /* Calling MPI_Get_count with "MPI_BYTE" is only valid when we actually
- * had the 'buf_type' set to MPI_BYTE -QAK
+ /*
+ * Calling MPI_Get_count with "MPI_BYTE" is only valid when we
+ * actually had the 'buf_type' set to MPI_BYTE -QAK
*/
- if(use_view_this_time) {
- /* Figure out the mapping from the MPI 'buf_type' to bytes, someday...
- * If this gets fixed (and MPI_Get_count() is reliable), the
- * kludge below where the 'bytes_written' value from MPI_Get_count() is
- * overwritten with the 'size_i' parameter can be removed. -QAK
+ if (use_view_this_time) {
+ /*
+ * Figure out the mapping from the MPI 'buf_type' to bytes,
+ * someday... If this gets fixed (and MPI_Get_count() is
+ * reliable), the kludge below where the 'bytes_written' value
+ * from MPI_Get_count() is overwritten with the 'size_i'
+ * parameter can be removed. -QAK
*/
- } /* end if */
- else {
+ } else {
/* How many bytes were actually written? */
- if (MPI_SUCCESS!= (mpi_code=MPI_Get_count(&mpi_stat, MPI_BYTE, &bytes_written)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Get_count failed", mpi_code);
- } /* end else */
+ if ((mrc = MPI_Get_count(&status, MPI_BYTE, &bytes_written)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_Get_count failed", mrc);
+ }
#endif /* H5_HAVE_MPI_GET_COUNT */
/*
* KLUGE rky, 1998-02-02
+ *
* MPI_Get_count incorrectly returns negative count; fake a complete
* write.
*/
bytes_written = size_i;
/* Check for write failure */
- if (bytes_written<0 || bytes_written>size_i)
+ if (bytes_written < 0 || bytes_written > size_i)
HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file write failed");
/*
* Reset the file view when we used MPI derived types
*/
- if (use_view_this_time) {
+ if (use_view_this_time)
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
- if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code);
- } /* end if */
+ if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE,
+ (char*)"native",
+ file->info)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc);
/* Forget the EOF value (see H5FD_fphdf5_get_eof()) --rpm 1999-08-06 */
file->eof = HADDR_UNDEF;
done:
/* Guard against getting into metadate broadcast in failure cases */
- if(ret_value!=FAIL) {
- /* if only p<round> writes, need to broadcast the ret_value to other processes */
- if ((type!=H5FD_MEM_DRAW) && H5_fphdf5_1_metawrite_g) {
- if (MPI_SUCCESS != (mpi_code=MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, file->mpi_round, file->comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code);
+ if (ret_value != FAIL)
+ /*
+ * If only p<round> writes, need to broadcast the ret_value to
+ * other processes
+ */
+ if (mem_type != H5FD_MEM_DRAW) {
+ if ((mrc = MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE,
+ file->mpi_round, file->comm)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mrc);
/* Round-robin rotate to the next process */
- file->mpi_round = (++file->mpi_round)%file->mpi_size;
- } /* end if */
- } /* end if */
+ file->mpi_round = (++file->mpi_round) % file->mpi_size;
+ }
FUNC_LEAVE_NOAPI(ret_value);
-#else
- return SUCCEED;
-#endif
}
@@ -1468,16 +1741,16 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
-H5FD_fphdf5_flush(H5FD_t *_file, unsigned closing)
+H5FD_fphdf5_flush(H5FD_t *_file, hid_t dxpl_id, unsigned closing)
{
#if 0
H5FP_fphdf5_t *file = (H5FP_fphdf5_t*)_file;
- int mpi_code; /* mpi return code */
+ int mrc; /* mpi return code */
MPI_Offset mpi_off;
herr_t ret_value=SUCCEED;
#ifdef OLD_WAY
uint8_t byte=0;
- MPI_Status mpi_stat;
+ MPI_Status status;
#endif /* OLD_WAY */
FUNC_ENTER_NOAPI(H5FD_fphdf5_flush, FAIL);
@@ -1487,7 +1760,7 @@ H5FD_fphdf5_flush(H5FD_t *_file, unsigned closing)
#ifdef OLD_WAY
/* Portably initialize MPI status variable */
- HDmemset(&mpi_stat,0,sizeof(MPI_Status));
+ HDmemset(&status,0,sizeof(MPI_Status));
#endif /* OLD_WAY */
/* Extend the file to make sure it's large enough, then sync.
@@ -1499,27 +1772,27 @@ H5FD_fphdf5_flush(H5FD_t *_file, unsigned closing)
if (0==file->mpi_rank) {
if (H5FD_fphdf5_haddr_to_MPIOff(file->eoa-1, &mpi_off)<0)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "cannot convert from haddr_t to MPI_Offset");
- if (MPI_SUCCESS != (mpi_code=MPI_File_read_at(file->f, mpi_off, &byte, 1, MPI_BYTE, &mpi_stat)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code);
- if (MPI_SUCCESS != (mpi_code=MPI_File_write_at(file->f, mpi_off, &byte, 1, MPI_BYTE, &mpi_stat)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code);
+ if (MPI_SUCCESS != (mrc=MPI_File_read_at(file->f, mpi_off, &byte, 1, MPI_BYTE, &status)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mrc);
+ if (MPI_SUCCESS != (mrc=MPI_File_write_at(file->f, mpi_off, &byte, 1, MPI_BYTE, &status)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mrc);
} /* end if */
#else /* OLD_WAY */
if (H5FD_fphdf5_haddr_to_MPIOff(file->eoa, &mpi_off)<0)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "cannot convert from haddr_t to MPI_Offset");
/* Extend the file's size */
- if (MPI_SUCCESS != (mpi_code=MPI_File_set_size(file->f, mpi_off)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_set_size failed", mpi_code);
+ if (MPI_SUCCESS != (mrc=MPI_File_set_size(file->f, mpi_off)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_size failed", mrc);
- /* Don't let any proc return until all have extended the file.
+ /* Don't let any proc return until all have extended the file.
* (Prevents race condition where some processes go ahead and write
* more data to the file before all the processes have finished making
* it the shorter length, potentially truncating the file and dropping
* the new data written)
*/
- if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code);
+ if (MPI_SUCCESS!= (mrc=MPI_Barrier(file->comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mrc);
#endif /* OLD_WAY */
/* Update the 'last' eoa value */
@@ -1528,8 +1801,8 @@ H5FD_fphdf5_flush(H5FD_t *_file, unsigned closing)
/* Only sync the file if we are not going to immediately close it */
if(!closing) {
- if (MPI_SUCCESS != (mpi_code=MPI_File_sync(file->f)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_sync failed", mpi_code);
+ if (MPI_SUCCESS != (mrc=MPI_File_sync(file->f)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_sync failed", mrc);
} /* end if */
done:
@@ -1573,7 +1846,7 @@ H5FD_fphdf5_MPIOff_to_haddr(MPI_Offset mpi_off)
*-------------------------------------------------------------------------
*/
static herr_t
-H5FD_fphdf5_haddr_to_MPIOff(haddr_t addr, MPI_Offset *mpi_off /*out*/)
+H5FD_fphdf5_haddr_to_MPIOff(haddr_t addr, MPI_Offset *mpi_off)
{
herr_t ret_value = FAIL;
diff --git a/src/H5FDfphdf5.h b/src/H5FDfphdf5.h
index 46061ec..2aca0ed 100644
--- a/src/H5FDfphdf5.h
+++ b/src/H5FDfphdf5.h
@@ -15,6 +15,7 @@
#ifndef H5FDFPHDF5_H__
#define H5FDFPHDF5_H__
+#include "H5FDmpio.h"
#include "H5FDpublic.h"
#include "H5Ipublic.h"
@@ -42,16 +43,37 @@
extern "C" {
#endif /* __cplusplus */
-H5_DLL hid_t H5FD_fphdf5_init(void);
-H5_DLL herr_t H5Pset_fapl_fphdf5(hid_t fapl_id, MPI_Comm comm, MPI_Info info);
-H5_DLL herr_t H5Pget_fapl_fphdf5(hid_t fapl_id, MPI_Comm *comm/*out*/,
- MPI_Info *info/*out*/);
+/*
+ *==--------------------------------------------------------------------------==
+ * API Functions
+ *==--------------------------------------------------------------------------==
+ */
+H5_DLL herr_t H5Pset_dxpl_fphdf5(hid_t dxpl_id, H5FD_mpio_xfer_t xfer_mode);
+H5_DLL herr_t H5Pget_dxpl_fphdf5(hid_t dxpl_id, H5FD_mpio_xfer_t *xfer_mode);
+H5_DLL herr_t H5Pset_fapl_fphdf5(hid_t fapl_id, MPI_Comm comm,
+ MPI_Comm barrier_comm, MPI_Info info,
+ unsigned sap_rank);
+H5_DLL herr_t H5Pget_fapl_fphdf5(hid_t fapl_id, MPI_Comm *comm,
+ MPI_Comm *barrier_comm, MPI_Info *info,
+ unsigned *sap_rank, unsigned *capt_rank);
+
+/*
+ *==--------------------------------------------------------------------------==
+ * Private Library Functions
+ *==--------------------------------------------------------------------------==
+ */
+H5_DLL hid_t H5FD_fphdf5_init(void);
H5_DLL MPI_Comm H5FD_fphdf5_communicator(H5FD_t *_file);
-H5_DLL herr_t H5FD_fphdf5_setup(hid_t dxpl_id, MPI_Datatype btype,
- MPI_Datatype ftype, unsigned use_view);
-H5_DLL herr_t H5FD_fphdf5_teardown(hid_t dxpl_id);
-H5_DLL int H5FD_fphdf5_mpi_rank(H5FD_t *_file);
-H5_DLL int H5FD_fphdf5_mpi_size(H5FD_t *_file);
+H5_DLL MPI_Comm H5FD_fphdf5_barrier_communicator(H5FD_t *_file);
+H5_DLL herr_t H5FD_fphdf5_setup(hid_t dxpl_id, MPI_Datatype btype,
+ MPI_Datatype ftype, unsigned use_view);
+H5_DLL herr_t H5FD_fphdf5_teardown(hid_t dxpl_id);
+H5_DLL int H5FD_fphdf5_mpi_rank(H5FD_t *_file);
+H5_DLL int H5FD_fphdf5_mpi_size(H5FD_t *_file);
+
+H5_DLL herr_t H5FD_fphdf5_write_real(H5FD_t *_file, H5FD_mem_t type,
+ hid_t dxpl_id, MPI_Offset mpi_off,
+ int size, const void *buf);
#ifdef __cplusplus
}
diff --git a/src/Makefile.in b/src/Makefile.in
index db8c717..b0e622a 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -34,13 +34,12 @@ LIB_SRC=H5.c H5A.c H5AC.c H5B.c H5D.c H5E.c H5F.c H5Farray.c H5Fcontig.c \
H5FDmulti.c H5FDsec2.c H5FDsrb.c H5FDstdio.c H5FDstream.c H5FL.c \
H5FO.c H5FP.c H5FPclient.c H5FPserver.c H5FS.c H5G.c H5Gent.c \
H5Gnode.c H5Gstab.c H5HG.c H5HL.c H5I.c H5MF.c H5MM.c H5O.c H5Oattr.c \
- H5Obogus.c H5Ocont.c H5Odtype.c H5Oefl.c H5Ofill.c H5Ofphdf5.c \
- H5Olayout.c H5Omtime.c H5Oname.c H5Onull.c H5Opline.c H5Oplist.c \
- H5Osdspace.c H5Oshared.c H5Ostab.c H5P.c H5Pdcpl.c H5Pdxpl.c \
- H5Pfapl.c H5Pfcpl.c H5R.c H5RS.c H5S.c H5Sall.c H5Shyper.c H5Smpio.c \
- H5Snone.c H5Spoint.c H5Sselect.c H5ST.c H5T.c H5Tbit.c H5Tconv.c \
- H5Tinit.c H5Tvlen.c H5TB.c H5TS.c H5V.c H5Z.c H5Zdeflate.c \
- H5Zshuffle.c H5Zadler32.c
+ H5Obogus.c H5Ocont.c H5Odtype.c H5Oefl.c H5Ofill.c H5Olayout.c \
+ H5Omtime.c H5Oname.c H5Onull.c H5Opline.c H5Oplist.c H5Osdspace.c \
+ H5Oshared.c H5Ostab.c H5P.c H5Pdcpl.c H5Pdxpl.c H5Pfapl.c H5Pfcpl.c \
+ H5R.c H5RS.c H5S.c H5Sall.c H5Shyper.c H5Smpio.c H5Snone.c H5Spoint.c \
+ H5Sselect.c H5ST.c H5T.c H5Tbit.c H5Tconv.c H5Tinit.c H5Tvlen.c \
+ H5TB.c H5TS.c H5V.c H5Z.c H5Zdeflate.c H5Zshuffle.c H5Zadler32.c
LIB_OBJ=$(LIB_SRC:.c=.lo)