summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJonathan Kim <jkm@hdfgroup.org>2012-09-04 17:00:38 (GMT)
committerJonathan Kim <jkm@hdfgroup.org>2012-09-04 17:00:38 (GMT)
commit0aa8d4d88a7d90cbb841bafacf54d97bc1d7a3f7 (patch)
tree0df0359ab13df0f84c4d69689007bcae2c9880e4 /src
parent3ab17f5e0409b717d9739577f8cc59ce460401bc (diff)
downloadhdf5-0aa8d4d88a7d90cbb841bafacf54d97bc1d7a3f7.zip
hdf5-0aa8d4d88a7d90cbb841bafacf54d97bc1d7a3f7.tar.gz
hdf5-0aa8d4d88a7d90cbb841bafacf54d97bc1d7a3f7.tar.bz2
[svn-r22735] Purpose:
HDFFV-8143 Provide a routine(s) for telling the user why the library broke collective data access Description: Added H5Pget_mpio_no_collective_cause() function that retrive reasons why the collective I/O was broken during Read/Write IO access. Reasons to break collective I/O: - SET_INDEPENDENT - DATATYPE_CONVERSION - DATA_TRANSFORMS - MPIPOSIX - NOT_SIMPLE_OR_SCALAR_DATASPACES (NULL Space) - POINT_SELECTIONS - NOT_CONTIGUOUS_OR_CHUNKED_DATASET (Compact or External-Storage) - FILTERS Tested: jam (linux32-LE), koala (linux64-LE), ostrich (linuxppc64-BE), tejeda (mac32-LE), linew (solaris-BE)
Diffstat (limited to 'src')
-rw-r--r--src/H5Dio.c2
-rw-r--r--src/H5Dmpio.c66
-rw-r--r--src/H5Dpkg.h3
-rw-r--r--src/H5Dprivate.h2
-rw-r--r--src/H5Pdxpl.c51
-rw-r--r--src/H5Ppublic.h14
-rw-r--r--src/H5trace.c54
7 files changed, 163 insertions, 29 deletions
diff --git a/src/H5Dio.c b/src/H5Dio.c
index e34452c..1bd6dae 100644
--- a/src/H5Dio.c
+++ b/src/H5Dio.c
@@ -997,7 +997,7 @@ H5D__ioinfo_adjust(H5D_io_info_t *io_info, const H5D_t *dset, hid_t dxpl_id,
HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator")
/* Check if we can set direct MPI-IO read/write functions */
- if((opt = H5D__mpio_opt_possible(io_info, file_space, mem_space, type_info, fm)) < 0)
+ if((opt = H5D__mpio_opt_possible(io_info, file_space, mem_space, type_info, fm, dx_plist)) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "invalid check for direct IO dataspace ")
/* Check if we can use the optimized parallel I/O routines */
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c
index 9b8fa27..c2d964e 100644
--- a/src/H5Dmpio.c
+++ b/src/H5Dmpio.c
@@ -156,10 +156,12 @@ static herr_t H5D__mpio_get_sum_chunk(const H5D_io_info_t *io_info,
htri_t
H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space,
const H5S_t *mem_space, const H5D_type_info_t *type_info,
- const H5D_chunk_map_t *fm)
+ const H5D_chunk_map_t *fm, H5P_genplist_t *dx_plist)
{
- int local_opinion = TRUE; /* This process's idea of whether to perform collective I/O or not */
- int consensus; /* Consensus opinion of all processes */
+ /* variables to set cause of broken collective I/O */
+ int local_cause = 0;
+ int global_cause = 0;
+
int mpi_code; /* MPI error code */
htri_t ret_value = TRUE;
@@ -171,51 +173,54 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space,
HDassert(file_space);
HDassert(type_info);
+
/* For independent I/O, get out quickly and don't try to form consensus */
- if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_INDEPENDENT)
+ if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_INDEPENDENT) {
+ local_cause = H5D_MPIO_SET_INDEPENDENT;
+ global_cause = H5D_MPIO_SET_INDEPENDENT;
HGOTO_DONE(FALSE);
+ }
+
+ /* Optimized MPI types flag must be set and it must be collective IO */
+ /* (Don't allow parallel I/O for the MPI-posix driver, since it doesn't do real collective I/O) */
+ if(!(H5S_mpi_opt_types_g && io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE
+ && !IS_H5FD_MPIPOSIX(io_info->dset->oloc.file))) {
+ local_cause |= H5D_MPIO_SET_MPIPOSIX;
+ } /* end if */
/* Don't allow collective operations if datatype conversions need to happen */
if(!type_info->is_conv_noop) {
- local_opinion = FALSE;
- goto broadcast;
+ local_cause |= H5D_MPIO_DATATYPE_CONVERSION;
} /* end if */
/* Don't allow collective operations if data transform operations should occur */
if(!type_info->is_xform_noop) {
- local_opinion = FALSE;
- goto broadcast;
- } /* end if */
-
- /* Optimized MPI types flag must be set and it must be collective IO */
- /* (Don't allow parallel I/O for the MPI-posix driver, since it doesn't do real collective I/O) */
- if(!(H5S_mpi_opt_types_g && io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE
- && !IS_H5FD_MPIPOSIX(io_info->dset->oloc.file))) {
- local_opinion = FALSE;
- goto broadcast;
+ local_cause |= H5D_MPIO_DATA_TRANSFORMS;
} /* end if */
/* Check whether these are both simple or scalar dataspaces */
if(!((H5S_SIMPLE == H5S_GET_EXTENT_TYPE(mem_space) || H5S_SCALAR == H5S_GET_EXTENT_TYPE(mem_space))
&& (H5S_SIMPLE == H5S_GET_EXTENT_TYPE(file_space) || H5S_SCALAR == H5S_GET_EXTENT_TYPE(file_space)))) {
- local_opinion = FALSE;
- goto broadcast;
+ local_cause |= H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES;
} /* end if */
/* Can't currently handle point selections */
if(H5S_SEL_POINTS == H5S_GET_SELECT_TYPE(mem_space)
|| H5S_SEL_POINTS == H5S_GET_SELECT_TYPE(file_space)) {
- local_opinion = FALSE;
- goto broadcast;
+ local_cause |= H5D_MPIO_POINT_SELECTIONS;
} /* end if */
/* Dataset storage must be contiguous or chunked */
if(!(io_info->dset->shared->layout.type == H5D_CONTIGUOUS ||
io_info->dset->shared->layout.type == H5D_CHUNKED)) {
- local_opinion = FALSE;
- goto broadcast;
+ local_cause |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET;
} /* end if */
+ /* check if external-file storage is used */
+ if (io_info->dset->shared->dcpl_cache.efl.nused > 0) {
+ local_cause |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET;
+ }
+
/* The handling of memory space is different for chunking and contiguous
* storage. For contiguous storage, mem_space and file_space won't change
* when it it is doing disk IO. For chunking storage, mem_space will
@@ -226,21 +231,28 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space,
/* Don't allow collective operations if filters need to be applied */
if(io_info->dset->shared->layout.type == H5D_CHUNKED) {
if(io_info->dset->shared->dcpl_cache.pline.nused > 0) {
- local_opinion = FALSE;
- goto broadcast;
+ local_cause |= H5D_MPIO_FILTERS;
} /* end if */
} /* end if */
-broadcast:
/* Form consensus opinion among all processes about whether to perform
* collective I/O
*/
- if(MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_opinion, &consensus, 1, MPI_INT, MPI_LAND, io_info->comm)))
+ if(MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_cause, &global_cause, 1, MPI_INT, MPI_BOR, io_info->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code)
- ret_value = consensus > 0 ? TRUE : FALSE;
+ ret_value = global_cause > 0 ? FALSE : TRUE;
+
done:
+ /* Write the local value of no-collective-cause to the DXPL. */
+ if(H5P_set(dx_plist, H5D_MPIO_LOCAL_NO_COLLECTIVE_CAUSE_NAME, &local_cause) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "couldn't set local no collective cause property")
+
+ /* Write the global value of no-collective-cause to the DXPL. */
+ if(H5P_set(dx_plist, H5D_MPIO_GLOBAL_NO_COLLECTIVE_CAUSE_NAME, &global_cause) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "couldn't set global no collective cause property")
+
FUNC_LEAVE_NOAPI(ret_value)
} /* H5D__mpio_opt_possible() */
diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h
index dfc19b8..ed6da8f 100644
--- a/src/H5Dpkg.h
+++ b/src/H5Dpkg.h
@@ -693,7 +693,8 @@ H5_DLL herr_t H5D__chunk_collective_write(H5D_io_info_t *io_info,
* memory and the file */
H5_DLL htri_t H5D__mpio_opt_possible(const H5D_io_info_t *io_info,
const H5S_t *file_space, const H5S_t *mem_space,
- const H5D_type_info_t *type_info, const H5D_chunk_map_t *fm);
+ const H5D_type_info_t *type_info, const H5D_chunk_map_t *fm,
+ H5P_genplist_t *dx_plist);
#endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5Dprivate.h b/src/H5Dprivate.h
index 2211f79..85051c3 100644
--- a/src/H5Dprivate.h
+++ b/src/H5Dprivate.h
@@ -74,6 +74,8 @@
#define H5D_XFER_MPIO_CHUNK_OPT_RATIO_NAME "mpio_chunk_opt_ratio"
#define H5D_MPIO_ACTUAL_CHUNK_OPT_MODE_NAME "actual_chunk_opt_mode"
#define H5D_MPIO_ACTUAL_IO_MODE_NAME "actual_io_mode"
+#define H5D_MPIO_LOCAL_NO_COLLECTIVE_CAUSE_NAME "local_no_collective_cause" /* cause of broken collective I/O in each process */
+#define H5D_MPIO_GLOBAL_NO_COLLECTIVE_CAUSE_NAME "global_no_collective_cause" /* cause of broken collective I/O in all processes */
#endif /* H5_HAVE_PARALLEL */
#define H5D_XFER_EDC_NAME "err_detect" /* EDC */
#define H5D_XFER_FILTER_CB_NAME "filter_cb" /* Filter callback function */
diff --git a/src/H5Pdxpl.c b/src/H5Pdxpl.c
index d0e728c..2596d35 100644
--- a/src/H5Pdxpl.c
+++ b/src/H5Pdxpl.c
@@ -102,6 +102,9 @@
/* Definitions for chunk io mode property. */
#define H5D_MPIO_ACTUAL_IO_MODE_SIZE sizeof(H5D_mpio_actual_io_mode_t)
#define H5D_MPIO_ACTUAL_IO_MODE_DEF H5D_MPIO_NO_COLLECTIVE
+/* Definitions for cause of broken collective io property */
+#define H5D_MPIO_NO_COLLECTIVE_CAUSE_SIZE sizeof(H5D_mpio_no_collective_cause_t)
+#define H5D_MPIO_NO_COLLECTIVE_CAUSE_DEF H5D_MPIO_COLLECTIVE
/* Definitions for memory MPI type property */
#define H5FD_MPI_XFER_MEM_MPI_TYPE_SIZE sizeof(MPI_Datatype)
#define H5FD_MPI_XFER_MEM_MPI_TYPE_DEF MPI_DATATYPE_NULL
@@ -211,6 +214,7 @@ H5P__dxfr_reg_prop(H5P_genclass_t *pclass)
unsigned def_mpio_chunk_opt_ratio = H5D_XFER_MPIO_CHUNK_OPT_RATIO_DEF;
H5D_mpio_actual_chunk_opt_mode_t def_mpio_actual_chunk_opt_mode = H5D_MPIO_ACTUAL_CHUNK_OPT_MODE_DEF;
H5D_mpio_actual_io_mode_t def_mpio_actual_io_mode = H5D_MPIO_ACTUAL_IO_MODE_DEF;
+ H5D_mpio_no_collective_cause_t def_mpio_no_collective_cause = H5D_MPIO_NO_COLLECTIVE_CAUSE_DEF;
MPI_Datatype btype = H5FD_MPI_XFER_MEM_MPI_TYPE_DEF; /* Default value for MPI buffer type */
MPI_Datatype ftype = H5FD_MPI_XFER_FILE_MPI_TYPE_DEF; /* Default value for MPI file type */
#endif /* H5_HAVE_PARALLEL */
@@ -287,6 +291,14 @@ H5P__dxfr_reg_prop(H5P_genclass_t *pclass)
if(H5P_register_real(pclass, H5D_MPIO_ACTUAL_IO_MODE_NAME, H5D_MPIO_ACTUAL_IO_MODE_SIZE, &def_mpio_actual_io_mode, NULL, NULL, NULL, NULL, NULL, NULL, NULL) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class")
+ /* Register the local cause of broken collective I/O */
+ if(H5P_register_real(pclass, H5D_MPIO_LOCAL_NO_COLLECTIVE_CAUSE_NAME, H5D_MPIO_NO_COLLECTIVE_CAUSE_SIZE, &def_mpio_actual_io_mode, NULL, NULL, NULL, NULL, NULL, NULL, NULL) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class")
+
+ /* Register the global cause of broken collective I/O */
+ if(H5P_register_real(pclass, H5D_MPIO_GLOBAL_NO_COLLECTIVE_CAUSE_NAME, H5D_MPIO_NO_COLLECTIVE_CAUSE_SIZE, &def_mpio_actual_io_mode, NULL, NULL, NULL, NULL, NULL, NULL, NULL) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class")
+
/* Register the MPI memory type property */
if(H5P_register_real(pclass, H5FD_MPI_XFER_MEM_MPI_TYPE_NAME, H5FD_MPI_XFER_MEM_MPI_TYPE_SIZE,
&btype, NULL, NULL, NULL, NULL, NULL, NULL, NULL) < 0)
@@ -1360,5 +1372,44 @@ H5Pget_mpio_actual_io_mode(hid_t plist_id, H5D_mpio_actual_io_mode_t *actual_io_
done:
FUNC_LEAVE_API(ret_value)
} /* end H5Pget_mpio_actual_io_mode() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5Pget_mpio_no_collective_cause
+ *
+ * Purpose: Retrieves cause for the broke collective I/O
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: Jonathan Kim
+ * Aug 3, 2012
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5Pget_mpio_no_collective_cause(hid_t plist_id, H5D_mpio_no_collective_cause_t *local_no_collective_cause, H5D_mpio_no_collective_cause_t *global_no_collective_cause)
+{
+ H5P_genplist_t *plist;
+ herr_t ret_value = SUCCEED; /* return value */
+
+ FUNC_ENTER_API(FAIL)
+ H5TRACE3("e", "i*Dn*Dn", plist_id, local_no_collective_cause,
+ global_no_collective_cause);
+
+ /* Get the plist structure */
+ if(NULL == (plist = H5P_object_verify(plist_id, H5P_DATASET_XFER)))
+ HGOTO_ERROR(H5E_ATOM, H5E_BADATOM, FAIL, "can't find object for ID")
+
+ /* Return values */
+ if(local_no_collective_cause)
+ if(H5P_get(plist, H5D_MPIO_LOCAL_NO_COLLECTIVE_CAUSE_NAME, local_no_collective_cause) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "unable to get local value")
+ if(global_no_collective_cause)
+ if(H5P_get(plist, H5D_MPIO_GLOBAL_NO_COLLECTIVE_CAUSE_NAME, global_no_collective_cause) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "unable to get global value")
+
+done:
+ FUNC_LEAVE_API(ret_value)
+} /* end H5Pget_mpio_no_collective_cause() */
+
+
#endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5Ppublic.h b/src/H5Ppublic.h
index 654772a..fd75e86 100644
--- a/src/H5Ppublic.h
+++ b/src/H5Ppublic.h
@@ -153,6 +153,19 @@ typedef enum H5D_mpio_actual_io_mode_t {
H5D_MPIO_CONTIGUOUS_COLLECTIVE = 0x4
} H5D_mpio_actual_io_mode_t;
+/* Broken collective IO property */
+typedef enum H5D_mpio_no_collective_cause_t {
+ H5D_MPIO_COLLECTIVE = 0x00,
+ H5D_MPIO_SET_INDEPENDENT = 0x01,
+ H5D_MPIO_DATATYPE_CONVERSION = 0x02,
+ H5D_MPIO_DATA_TRANSFORMS = 0x04,
+ H5D_MPIO_SET_MPIPOSIX = 0x08,
+ H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES = 0x10,
+ H5D_MPIO_POINT_SELECTIONS = 0x20,
+ H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET = 0x40,
+ H5D_MPIO_FILTERS = 0x80
+} H5D_mpio_no_collective_cause_t;
+
/********************/
/* Public Variables */
/********************/
@@ -399,6 +412,7 @@ H5_DLL herr_t H5Pget_type_conv_cb(hid_t dxpl_id, H5T_conv_except_func_t *op, voi
#ifdef H5_HAVE_PARALLEL
H5_DLL herr_t H5Pget_mpio_actual_chunk_opt_mode(hid_t plist_id, H5D_mpio_actual_chunk_opt_mode_t *actual_chunk_opt_mode);
H5_DLL herr_t H5Pget_mpio_actual_io_mode(hid_t plist_id, H5D_mpio_actual_io_mode_t *actual_io_mode);
+H5_DLL herr_t H5Pget_mpio_no_collective_cause(hid_t plist_id, H5D_mpio_no_collective_cause_t *local_no_collective_cause, H5D_mpio_no_collective_cause_t *global_no_collective_cause);
#endif /* H5_HAVE_PARALLEL */
/* Link creation property list (LCPL) routines */
diff --git a/src/H5trace.c b/src/H5trace.c
index b559669..2dab8ec 100644
--- a/src/H5trace.c
+++ b/src/H5trace.c
@@ -535,6 +535,60 @@ H5_trace(const double *returning, const char *func, const char *type, ...)
} /* end else */
break;
+ case 'n':
+ if(ptr) {
+ if(vp)
+ fprintf(out, "0x%lx", (unsigned long)vp);
+ else
+ fprintf(out, "NULL");
+ } /* end if */
+ else {
+ H5D_mpio_no_collective_cause_t nocol_cause_mode = (H5D_mpio_no_collective_cause_t)va_arg(ap, int);
+
+ switch(nocol_cause_mode) {
+ case H5D_MPIO_COLLECTIVE:
+ fprintf(out, "H5D_MPIO_COLLECTIVE");
+ break;
+
+ case H5D_MPIO_SET_INDEPENDENT:
+ fprintf(out, "H5D_MPIO_SET_INDEPENDENT");
+ break;
+
+ case H5D_MPIO_DATATYPE_CONVERSION:
+ fprintf(out, "H5D_MPIO_DATATYPE_CONVERSION");
+ break;
+
+ case H5D_MPIO_DATA_TRANSFORMS:
+ fprintf(out, "H5D_MPIO_DATA_TRANSFORMS");
+ break;
+
+ case H5D_MPIO_SET_MPIPOSIX:
+ fprintf(out, "H5D_MPIO_SET_MPIPOSIX");
+ break;
+
+ case H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES:
+ fprintf(out, "H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES");
+ break;
+
+ case H5D_MPIO_POINT_SELECTIONS:
+ fprintf(out, "H5D_MPIO_POINT_SELECTIONS");
+ break;
+
+ case H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET:
+ fprintf(out, "H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET");
+ break;
+
+ case H5D_MPIO_FILTERS:
+ fprintf(out, "H5D_MPIO_FILTERS");
+ break;
+
+ default:
+ fprintf(out, "%ld", (long)nocol_cause_mode);
+ break;
+ } /* end switch */
+ } /* end else */
+ break;
+
case 'o':
if(ptr) {
if(vp)