summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2003-10-27 21:38:27 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2003-10-27 21:38:27 (GMT)
commit8a43391a1157aa6bccb530cb1a39e6ad5e0e01e2 (patch)
tree6a803fcfb644aed148738c6d97b54cb287fce421
parent8dc1c9870a39b5e8ecb5f313661e27af60511fe3 (diff)
downloadhdf5-8a43391a1157aa6bccb530cb1a39e6ad5e0e01e2.zip
hdf5-8a43391a1157aa6bccb530cb1a39e6ad5e0e01e2.tar.gz
hdf5-8a43391a1157aa6bccb530cb1a39e6ad5e0e01e2.tar.bz2
[svn-r7754] Purpose:
Code cleanup Description: Straighten out more goofiness in the MPI code dealing with collective I/O transfers - mostly make certain that a view is set if-and-only-if collective I/O is occurring on raw data (and vice versa for views and independent I/O) Platforms tested: FreeBSD 4.9 (sleipnir) w/parallel & FPHDF5 too minor to repquire h5committest
-rw-r--r--src/H5Dio.c14
-rw-r--r--src/H5FDfphdf5.c48
-rw-r--r--src/H5FDmpio.c62
-rw-r--r--src/H5S.c20
-rw-r--r--src/H5Sprivate.h2
5 files changed, 89 insertions, 57 deletions
diff --git a/src/H5Dio.c b/src/H5Dio.c
index 0fcc7ea..12d4dec 100644
--- a/src/H5Dio.c
+++ b/src/H5Dio.c
@@ -515,6 +515,7 @@ H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
H5S_conv_t *sconv=NULL; /*space conversion funcs*/
#ifdef H5_HAVE_PARALLEL
H5FD_mpio_xfer_t xfer_mode; /*xfer_mode for this request */
+ hbool_t use_par_opt_io=FALSE; /* Whether the 'optimized' I/O routines with be parallel */
hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */
#endif /*H5_HAVE_PARALLEL*/
H5P_genplist_t *dx_plist=NULL; /* Data transfer property list */
@@ -637,11 +638,13 @@ H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
} /* end switch */
/* Get dataspace functions */
- if (NULL==(sconv=H5S_find(mem_space, file_space, sconv_flags)))
+ if (NULL==(sconv=H5S_find(mem_space, file_space, sconv_flags, &use_par_opt_io)))
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to convert from file to memory data space")
#ifdef H5_HAVE_PARALLEL
- H5D_io_assist_mpio(dx_plist, xfer_mode, &xfer_mode_changed);
+ /* Don't reset the transfer mode if we can't or won't use it */
+ if(!use_par_opt_io || !H5T_path_noop(tpath))
+ H5D_io_assist_mpio(dx_plist, xfer_mode, &xfer_mode_changed);
#endif /*H5_HAVE_PARALLEL*/
/* Determine correct I/O routine to invoke */
@@ -737,6 +740,7 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
H5S_conv_t *sconv=NULL; /*space conversion funcs*/
#ifdef H5_HAVE_PARALLEL
H5FD_mpio_xfer_t xfer_mode; /*xfer_mode for this request */
+ hbool_t use_par_opt_io=FALSE; /* Whether the 'optimized' I/O routines with be parallel */
hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */
#endif /*H5_HAVE_PARALLEL*/
H5P_genplist_t *dx_plist=NULL; /* Data transfer property list */
@@ -857,11 +861,13 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
} /* end switch */
/* Get dataspace functions */
- if (NULL==(sconv=H5S_find(mem_space, file_space, sconv_flags)))
+ if (NULL==(sconv=H5S_find(mem_space, file_space, sconv_flags, &use_par_opt_io)))
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to convert from memory to file data space")
#ifdef H5_HAVE_PARALLEL
- H5D_io_assist_mpio(dx_plist, xfer_mode, &xfer_mode_changed);
+ /* Don't reset the transfer mode if we can't or won't use it */
+ if(!use_par_opt_io || !H5T_path_noop(tpath))
+ H5D_io_assist_mpio(dx_plist, xfer_mode, &xfer_mode_changed);
#endif /*H5_HAVE_PARALLEL*/
/* Determine correct I/O routine to invoke */
diff --git a/src/H5FDfphdf5.c b/src/H5FDfphdf5.c
index 59dbecf..577f51d 100644
--- a/src/H5FDfphdf5.c
+++ b/src/H5FDfphdf5.c
@@ -1161,7 +1161,6 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id,
{
H5FD_fphdf5_t *file = (H5FD_fphdf5_t*)_file;
MPI_Offset mpi_off;
- MPI_Offset mpi_disp;
MPI_Status status;
int mrc;
MPI_Datatype buf_type;
@@ -1216,6 +1215,9 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id,
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
if (use_view_this_time) {
+ /* Sanity check that views will only be used by collective I/O */
+ assert(xfer_mode==H5FD_MPIO_COLLECTIVE);
+
/* Prepare for a full-blown xfer using btype, ftype, and disp */
if (H5P_get(plist, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
@@ -1223,26 +1225,27 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id,
if (H5P_get(plist, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
+ /* Set the file view when we are using MPI derived types */
+ if ((mrc = MPI_File_set_view(file->f, mpi_off, MPI_BYTE,
+ file_type, H5FD_mpio_native,
+ file->info)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc)
+
/*
* When using types, use the address as the displacement for
* MPI_File_set_view and reset the address for the read to zero
*/
- mpi_disp = mpi_off;
mpi_off = 0;
-
- /* Set the file view when we are using MPI derived types */
- if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)mpi_disp, MPI_BYTE,
- file_type, H5FD_mpio_native,
- file->info)) != MPI_SUCCESS)
- HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc)
} else {
+ /* Sanity check that independent I/O must be occuring */
+ assert(xfer_mode==H5FD_MPIO_INDEPENDENT);
+
/*
* Prepare for a simple xfer of a contiguous block of bytes. The
* btype, ftype, and disp fields are not used.
*/
buf_type = MPI_BYTE;
file_type = MPI_BYTE;
- mpi_disp = 0; /* mpi_off is already set */
}
/* If metadata, check the metadata cache first */
@@ -1271,9 +1274,7 @@ HDfprintf(stderr, "%s:%d: Metadata cache read failed!\n", FUNC, __LINE__);
}
/* Read the data. */
- assert(xfer_mode == H5FD_MPIO_INDEPENDENT || xfer_mode == H5FD_MPIO_COLLECTIVE);
-
- if (xfer_mode == H5FD_MPIO_INDEPENDENT) {
+ if (!use_view_this_time) {
if ((mrc = MPI_File_read_at(file->f, mpi_off, buf, size_i,
buf_type, &status)) != MPI_SUCCESS)
HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mrc)
@@ -1481,7 +1482,8 @@ H5FD_fphdf5_write_real(H5FD_t *_file, hid_t dxpl_id, haddr_t addr, int size,
if (use_view_this_time) {
- MPI_Offset mpi_disp;
+ /* Sanity check that views will only be used by collective I/O */
+ assert(xfer_mode==H5FD_MPIO_COLLECTIVE);
/* Prepare for a full-blown xfer using btype, ftype, and disp */
if (H5P_get(plist, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0)
@@ -1490,19 +1492,21 @@ H5FD_fphdf5_write_real(H5FD_t *_file, hid_t dxpl_id, haddr_t addr, int size,
if (H5P_get(plist, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
+ /* Set the file view when we are using MPI derived types */
+ if ((mrc = MPI_File_set_view(file->f, mpi_off, MPI_BYTE,
+ file_type, H5FD_mpio_native,
+ file->info)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc)
+
/*
* When using types, use the address as the displacement for
* MPI_File_set_view and reset the address for the read to zero
*/
- mpi_disp = mpi_off;
mpi_off = 0;
-
- /* Set the file view when we are using MPI derived types */
- if ((mrc = MPI_File_set_view(file->f, mpi_disp, MPI_BYTE,
- file_type, H5FD_mpio_native,
- file->info)) != MPI_SUCCESS)
- HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc)
} else {
+ /* Sanity check that independent I/O must be occuring */
+ assert(xfer_mode==H5FD_MPIO_INDEPENDENT);
+
/*
* Prepare for a simple xfer of a contiguous block of bytes. The
* btype and ftype.
@@ -1517,9 +1521,7 @@ H5FD_fphdf5_write_real(H5FD_t *_file, hid_t dxpl_id, haddr_t addr, int size,
xfer_mode = H5P_peek_unsigned(plist, H5D_XFER_IO_XFER_MODE_NAME);
/* Write the data. */
- assert(xfer_mode == H5FD_MPIO_INDEPENDENT || xfer_mode == H5FD_MPIO_COLLECTIVE);
-
- if (xfer_mode == H5FD_MPIO_INDEPENDENT) {
+ if (!use_view_this_time) {
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
if ((mrc = MPI_File_write_at(file->f, mpi_off, (void*)buf,
size, buf_type, &status)) != MPI_SUCCESS)
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index 5bd714b..4fefbde 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -1470,7 +1470,7 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
void *buf/*out*/)
{
H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
- MPI_Offset mpi_off, mpi_disp;
+ MPI_Offset mpi_off;
MPI_Status mpi_stat;
int mpi_code; /* mpi return code */
MPI_Datatype buf_type, file_type;
@@ -1528,40 +1528,41 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
if (use_view_this_time) {
+ /* Sanity check that views will only be used by collective I/O */
+ assert(xfer_mode==H5FD_MPIO_COLLECTIVE);
+
/* prepare for a full-blown xfer using btype, ftype, and disp */
if(H5P_get(plist,H5FD_MPIO_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
if(H5P_get(plist,H5FD_MPIO_XFER_FILE_MPI_TYPE_NAME,&file_type)<0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
+ /*
+ * Set the file view when we are using MPI derived types
+ */
+ /*OKAY: CAST DISCARDS CONST QUALIFIER*/
+ if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, (char*)"native", file->info)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
+
/* When using types, use the address as the displacement for
* MPI_File_set_view and reset the address for the read to zero
*/
- mpi_disp=mpi_off;
mpi_off=0;
} /* end if */
else {
+ /* Sanity check that independent I/O must be occuring */
+ assert(xfer_mode==H5FD_MPIO_INDEPENDENT);
+
/*
* Prepare for a simple xfer of a contiguous block of bytes. The
* btype, ftype, and disp fields are not used.
*/
buf_type = MPI_BYTE;
file_type = MPI_BYTE;
- mpi_disp = 0; /* mpi_off is alread set */
} /* end else */
- /*
- * Set the file view when we are using MPI derived types
- */
- if (use_view_this_time) {
- /*OKAY: CAST DISCARDS CONST QUALIFIER*/
- if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
- } /* end if */
-
/* Read the data. */
- assert(H5FD_MPIO_INDEPENDENT==xfer_mode || H5FD_MPIO_COLLECTIVE==xfer_mode);
- if (H5FD_MPIO_INDEPENDENT==xfer_mode) {
+ if (!use_view_this_time) {
if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
} else {
@@ -1776,7 +1777,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
size_t size, const void *buf)
{
H5FD_mpio_t *file = (H5FD_mpio_t*)_file;
- MPI_Offset mpi_off, mpi_disp;
+ MPI_Offset mpi_off;
MPI_Status mpi_stat;
MPI_Datatype buf_type, file_type;
int mpi_code; /* MPI return code */
@@ -1835,37 +1836,39 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
if (use_view_this_time) {
+ /* Sanity check that views will only be used by collective I/O */
+ assert(xfer_mode==H5FD_MPIO_COLLECTIVE);
+
/* prepare for a full-blown xfer using btype, ftype, and disp */
if(H5P_get(plist,H5FD_MPIO_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
if(H5P_get(plist,H5FD_MPIO_XFER_FILE_MPI_TYPE_NAME,&file_type)<0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
+ /*
+ * Set the file view when we are using MPI derived types
+ */
+ /*OKAY: CAST DISCARDS CONST QUALIFIER*/
+ if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, (char*)"native", file->info)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
+
/* When using types, use the address as the displacement for
* MPI_File_set_view and reset the address for the read to zero
*/
- mpi_disp=mpi_off;
mpi_off=0;
} /* end if */
else {
+ /* Sanity check that independent I/O must occur */
+ assert(xfer_mode==H5FD_MPIO_INDEPENDENT);
+
/*
* Prepare for a simple xfer of a contiguous block of bytes.
* The btype, ftype, and disp fields are not used.
*/
buf_type = MPI_BYTE;
file_type = MPI_BYTE;
- mpi_disp = 0; /* mpi_off is already set */
} /* end else */
- /*
- * Set the file view when we are using MPI derived types
- */
- if (use_view_this_time) {
- /*OKAY: CAST DISCARDS CONST QUALIFIER*/
- if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
- } /* end if */
-
/* Metadata specific actions */
if(type!=H5FD_MEM_DRAW) {
/* Check if we need to syncronize all processes before attempting metadata write
@@ -1873,6 +1876,10 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
* and writes the metadata to the file before all the processes have
* read the data, "transmitting" data from the "future" to the reading
* process. -QAK )
+ *
+ * The only time we don't want to block before a metdata write is when
+ * we are flushing out a bunch of metadata. Then, we block before the
+ * first write and don't block for further writes in the sequence.
*/
if(H5P_exist_plist(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME)>0)
if(H5P_get(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,&block_before_meta_write)<0)
@@ -1898,8 +1905,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
} /* end if */
/* Write the data. */
- assert(H5FD_MPIO_INDEPENDENT==xfer_mode || H5FD_MPIO_COLLECTIVE==xfer_mode);
- if (H5FD_MPIO_INDEPENDENT==xfer_mode) {
+ if (!use_view_this_time) {
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
if (MPI_SUCCESS != (mpi_code=MPI_File_write_at(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code)
diff --git a/src/H5S.c b/src/H5S.c
index f69f8f5..e1d0327 100644
--- a/src/H5S.c
+++ b/src/H5S.c
@@ -1478,7 +1478,11 @@ H5S_find (const H5S_t *mem_space, const H5S_t *file_space, unsigned
#ifndef H5_HAVE_PARALLEL
UNUSED
#endif /* H5_HAVE_PARALLEL */
-flags)
+flags, hbool_t
+#ifndef H5_HAVE_PARALLEL
+UNUSED
+#endif /* H5_HAVE_PARALLEL */
+*use_par_opt_io)
{
H5S_conv_t *path=NULL; /* Space conversion path */
#ifdef H5_HAVE_PARALLEL
@@ -1521,10 +1525,17 @@ flags)
/* Check if we can use the optimized parallel I/O routines */
if(opt==TRUE) {
+ /* Set the pointers to the MPI-specific routines */
H5S_conv_g[i]->read = H5S_mpio_spaces_read;
H5S_conv_g[i]->write = H5S_mpio_spaces_write;
+
+ /* Indicate that the I/O will be parallel */
+ *use_par_opt_io=TRUE;
} /* end if */
else {
+ /* Indicate that the I/O will _NOT_ be parallel */
+ *use_par_opt_io=FALSE;
+
#endif /* H5_HAVE_PARALLEL */
H5S_conv_g[i]->read = H5S_select_read;
H5S_conv_g[i]->write = H5S_select_write;
@@ -1556,10 +1567,17 @@ flags)
/* Check if we can use the optimized parallel I/O routines */
if(opt==TRUE) {
+ /* Set the pointers to the MPI-specific routines */
path->read = H5S_mpio_spaces_read;
path->write = H5S_mpio_spaces_write;
+
+ /* Indicate that the I/O will be parallel */
+ *use_par_opt_io=TRUE;
} /* end if */
else {
+ /* Indicate that the I/O will _NOT_ be parallel */
+ *use_par_opt_io=FALSE;
+
#endif /* H5_HAVE_PARALLEL */
path->read = H5S_select_read;
path->write = H5S_select_write;
diff --git a/src/H5Sprivate.h b/src/H5Sprivate.h
index 389b3b1..46d302f 100644
--- a/src/H5Sprivate.h
+++ b/src/H5Sprivate.h
@@ -164,7 +164,7 @@ typedef struct H5S_conv_t {
H5_DLL H5S_t *H5S_copy(const H5S_t *src);
H5_DLL herr_t H5S_close(H5S_t *ds);
H5_DLL H5S_conv_t *H5S_find(const H5S_t *mem_space, const H5S_t *file_space,
- unsigned flags);
+ unsigned flags, hbool_t *use_par_opt_io);
H5_DLL H5S_class_t H5S_get_simple_extent_type(const H5S_t *ds);
H5_DLL hssize_t H5S_get_simple_extent_npoints(const H5S_t *ds);
H5_DLL hsize_t H5S_get_npoints_max(const H5S_t *ds);