summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2005-08-13 03:09:38 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2005-08-13 03:09:38 (GMT)
commitc5aa1f7a5f55d2e482591ed4f7f4d2f3b84563eb (patch)
tree9db28b53a73d2816c1b46b7ec19e5f4dc38ca947
parent1ba06a8395d3223e7fde349fd5b733e2d8ac79ac (diff)
downloadhdf5-c5aa1f7a5f55d2e482591ed4f7f4d2f3b84563eb.zip
hdf5-c5aa1f7a5f55d2e482591ed4f7f4d2f3b84563eb.tar.gz
hdf5-c5aa1f7a5f55d2e482591ed4f7f4d2f3b84563eb.tar.bz2
[svn-r11236] Purpose:
Code cleanup & bug fix Description: Refactor recent collective chunk changes to simplify them. Also, compute the number of chunks by checking the number of objects in the chunk skip list instead of getting the boundaries of the selection and counting the chunks that it overlaps, which is faster and computes correct number for certain selections. Platforms tested: FreeBSD 4.11 (sleipnir) w/parallel Too minor to require h5committest
-rw-r--r--src/H5Dio.c719
-rw-r--r--src/H5Dpkg.h13
2 files changed, 306 insertions, 426 deletions
diff --git a/src/H5Dio.c b/src/H5Dio.c
index 3570cf6..ac6ce9a 100644
--- a/src/H5Dio.c
+++ b/src/H5Dio.c
@@ -99,33 +99,19 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts,
const H5S_t *file_space, H5T_path_t *tpath,
hid_t src_id, hid_t dst_id, const void *buf);
#ifdef H5_HAVE_PARALLEL
-static herr_t
-H5D_io_assist_mpio(hid_t dxpl_id, H5D_dxpl_cache_t *dxpl_cache,
- hbool_t *xfer_mode_changed);
-static herr_t
-H5D_io_restore_mpio(hid_t dxpl_id);
-static htri_t
-H5D_get_collective_io_consensus(const H5F_t *file,
- const htri_t local_opinion,
- const unsigned flags);
-
-static herr_t H5D_mpio_get_mini_chunk(const H5D_t *dset,
- const H5S_t *mem_space,
- const H5S_t *file_space,
- int *min_chunkf);
-static herr_t
-H5D_obtain_duplicate_pid(hid_t dxpl_id,
- hid_t* dp_id,
- H5D_dxpl_cache_t **cache);
-
+static herr_t H5D_ioinfo_make_ind(H5D_io_info_t *io_info);
+static herr_t H5D_ioinfo_make_coll(H5D_io_info_t *io_info);
+static herr_t H5D_ioinfo_term(H5D_io_info_t *io_info);
+static htri_t H5D_get_collective_io_consensus(const H5F_t *file,
+ const htri_t local_opinion, const unsigned flags);
+static herr_t H5D_mpio_get_min_chunk(const H5D_io_info_t *io_info,
+ const fm_map *fm, int *min_chunkf);
#endif /* H5_HAVE_PARALLEL */
/* I/O info operations */
-static herr_t
-H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id,
- hid_t dp_dxpl_id, H5D_dxpl_cache_t *dp_dxpl_cache,
- const H5S_t *mem_space, const H5S_t *file_space,
- unsigned flags, hbool_t *use_par_opt_io, H5D_io_info_t *io_info);
+static herr_t H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache,
+ hid_t dxpl_id, const H5S_t *mem_space, const H5S_t *file_space,
+ H5T_path_t *tpath, unsigned flags, H5D_io_info_t *io_info);
/* Chunk operations */
static herr_t H5D_create_chunk_map(const H5D_t *dataset, const H5T_t *mem_type,
@@ -658,13 +644,7 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
H5T_path_t *tpath = NULL; /*type conversion info */
const H5T_t *mem_type = NULL; /* Memory datatype */
H5D_io_info_t io_info; /* Dataset I/O info */
- hbool_t use_par_opt_io=FALSE; /* Whether the 'optimized' I/O routines with be parallel */
- H5D_dxpl_cache_t _dp_dxpl_cache; /* Data transfer property cache buffer */
- H5D_dxpl_cache_t *dp_dxpl_cache=&_dp_dxpl_cache; /* Data transfer property cache */
- hid_t dp_id;
#ifdef H5_HAVE_PARALLEL
- hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */
- H5FD_mpio_xfer_t xfer_mode;
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
int prop_value,new_value;
htri_t check_prop;
@@ -699,7 +679,6 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't fill dxpl cache")
#ifdef H5_HAVE_PARALLEL
-
/* Collective access is not permissible without a MPI based VFD */
if (dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPI(dataset->ent.file))
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPI-based drivers only")
@@ -783,14 +762,8 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
assert(0 && "Unhandled layout type!");
} /* end switch */
-#ifdef H5_HAVE_PARALLEL
- /* Obtain duplicate property list id. This is used to handle
- collective chunk IO. */
- if(H5D_obtain_duplicate_pid(dxpl_id,&dp_id,&dp_dxpl_cache)<0)
- HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't obtain duplicated property id")
-#endif
/* Set up I/O operation */
- if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,dp_id,dp_dxpl_cache,mem_space,file_space,sconv_flags,&use_par_opt_io,&io_info)<0)
+ if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,tpath,sconv_flags,&io_info)<0)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to set up I/O operation")
#ifdef H5_HAVE_PARALLEL
@@ -799,7 +772,6 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
notice the following code should be removed after
a more general collective chunk IO algorithm is applied.
*/
-
if(dataset->shared->layout.type == H5D_CHUNKED) { /*only check for chunking storage */
check_prop = H5Pexist(dxpl_id,H5D_XFER_COLL_CHUNK_NAME);
if(check_prop < 0)
@@ -807,7 +779,7 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
if(check_prop > 0) {
if(H5Pget(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&prop_value)<0)
HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to get property value");
- if(!use_par_opt_io) {
+ if(!io_info.use_par_opt_io) {
new_value = 0;
if(H5Pset(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&new_value)<0)
HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to set property value");
@@ -816,10 +788,6 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
}
/* end Test for collective chunk IO */
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
-
- /* Don't reset the transfer mode if we can't or won't use it */
- if(!use_par_opt_io || !H5T_path_noop(tpath))
- H5D_io_assist_mpio(dxpl_id, dxpl_cache, &xfer_mode_changed);
#endif /*H5_HAVE_PARALLEL*/
/* Determine correct I/O routine to invoke */
@@ -837,8 +805,8 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
done:
#ifdef H5_HAVE_PARALLEL
/* Restore xfer_mode due to the kludge */
- if (xfer_mode_changed)
- H5D_io_restore_mpio(dxpl_id);
+ if (io_info.xfer_mode_changed)
+ H5D_ioinfo_term(&io_info);
#endif /*H5_HAVE_PARALLEL*/
FUNC_LEAVE_NOAPI(ret_value)
@@ -900,14 +868,7 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
H5T_path_t *tpath = NULL; /*type conversion info */
const H5T_t *mem_type = NULL; /* Memory datatype */
H5D_io_info_t io_info; /* Dataset I/O info */
- hbool_t use_par_opt_io=FALSE; /* Whether the 'optimized' I/O routines with be parallel */
- hid_t dp_id;
- H5D_dxpl_cache_t _dp_dxpl_cache; /* Data transfer property cache buffer */
- H5D_dxpl_cache_t *dp_dxpl_cache=&_dp_dxpl_cache; /* Data transfer property cache */
#ifdef H5_HAVE_PARALLEL
- hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */
- H5FD_mpio_xfer_t xfer_mode;
- int mpi_rank;
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
int prop_value,new_value;
htri_t check_prop;
@@ -1042,12 +1003,9 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
default:
assert(0 && "Unhandled layout type!");
} /* end switch */
-#ifdef H5_HAVE_PARALLEL
- if(H5D_obtain_duplicate_pid(dxpl_id,&dp_id,&dp_dxpl_cache)<0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't obtain duplicated property id")
-#endif
+
/* Set up I/O operation */
- if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,dp_id,dp_dxpl_cache,mem_space,file_space,sconv_flags,&use_par_opt_io,&io_info)<0)
+ if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,tpath,sconv_flags,&io_info)<0)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to set up I/O operation")
#ifdef H5_HAVE_PARALLEL
@@ -1056,27 +1014,22 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
notice the following code should be removed after
a more general collective chunk IO algorithm is applied.
*/
-
if(dataset->shared->layout.type == H5D_CHUNKED) { /*only check for chunking storage */
-
check_prop = H5Pexist(dxpl_id,H5D_XFER_COLL_CHUNK_NAME);
if(check_prop < 0)
HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to check property list");
if(check_prop > 0) {
if(H5Pget(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&prop_value)<0)
HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to get property value");
- if(!use_par_opt_io) {
+ if(!io_info.use_par_opt_io) {
new_value = 0;
if(H5Pset(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&new_value)<0)
HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to set property value");
}
}
}
+ /* end Test for collective chunk IO */
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
-
- /* Don't reset the transfer mode if we can't or won't use it */
- if(!use_par_opt_io || !H5T_path_noop(tpath))
- H5D_io_assist_mpio(dxpl_id, dxpl_cache, &xfer_mode_changed);
#endif /*H5_HAVE_PARALLEL*/
/* Determine correct I/O routine to invoke */
@@ -1109,8 +1062,8 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
done:
#ifdef H5_HAVE_PARALLEL
/* Restore xfer_mode due to the kludge */
- if (xfer_mode_changed)
- H5D_io_restore_mpio(dxpl_id);
+ if (io_info.xfer_mode_changed)
+ H5D_ioinfo_term(&io_info);
#endif /*H5_HAVE_PARALLEL*/
FUNC_LEAVE_NOAPI(ret_value)
@@ -1684,13 +1637,11 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts,
uint8_t *tconv_buf = NULL; /*data type conv buffer */
uint8_t *bkg_buf = NULL; /*background buffer */
H5D_storage_t store; /*union of EFL and chunk pointer in file space */
- herr_t ret_value = SUCCEED; /*return value */
-
#ifdef H5_HAVE_PARALLEL
- int count_chunk,mpi_rank, mpi_code,min_num_chunk,is_regular,all_regular;
- hid_t temp_id;
- MPI_Comm comm;
+ int count_chunk; /* Number of chunks accessed */
+ int min_num_chunk; /* Number of chunks to access collectively */
#endif
+ herr_t ret_value = SUCCEED; /*return value */
FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_read)
@@ -1720,15 +1671,18 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts,
#ifdef H5_HAVE_PARALLEL
if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) {
- if(H5D_mpio_get_mini_chunk(dataset,mem_space,file_space,&min_num_chunk)<0)
+ if(H5D_mpio_get_min_chunk(io_info, &fm, &min_num_chunk)<0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get minimum number of chunk")
}
count_chunk = 0;
-#endif
+#endif /* H5_HAVE_PARALLEL */
/* Iterate through chunks to be operated on */
while(chunk_node) {
H5D_chunk_info_t *chunk_info; /* chunk information */
+#ifdef H5_HAVE_PARALLEL
+ hbool_t make_ind, make_coll; /* Flags to indicate that the MPI mode should change */
+#endif /* H5_HAVE_PARALLEL */
/* Get the actual chunk information from the skip list node */
chunk_info=H5SL_item(chunk_node);
@@ -1738,100 +1692,71 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts,
store.chunk.index = chunk_info->index;
#ifdef H5_HAVE_PARALLEL
- count_chunk++;
+ /* Reset flags for changing parallel I/O mode */
+ make_ind = make_coll = FALSE;
+
if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) {
+ /* Increment chunk we are operating on */
+ count_chunk++;
+
/* If the number of chunk is greater than minimum number of chunk,
Do independent read */
-
- if(count_chunk <= min_num_chunk) {
+ if(count_chunk > min_num_chunk) {
+ /* Switch to independent I/O (permanently) */
+ make_ind = TRUE;
+ }
#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
+ else {
+ int is_regular; /* If this chunk's selections are regular */
+ int mpi_code; /* MPI error code */
+ int all_regular = 0; /* If this chunk's selections are regular on all processes */
+
if(H5S_SELECT_IS_REGULAR(chunk_info->fspace) == TRUE &&
H5S_SELECT_IS_REGULAR(chunk_info->mspace) == TRUE)
is_regular = 1;
else
is_regular = 0;
- /* Getting MPI communicator and rank */
- if((comm = H5F_mpi_get_comm(dataset->ent.file))==MPI_COMM_NULL)
- HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator")
- if((mpi_rank = H5F_mpi_get_rank(dataset->ent.file))<0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI rank")
-
if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&all_regular,&is_regular,1,MPI_INT,MPI_MIN,0,comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&all_regular,1,MPI_INT,0,comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
-#endif /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */
- }
-
- if(count_chunk > min_num_chunk) {
- temp_id = io_info->dxpl_id;
- io_info->dxpl_id = io_info->dp_dxpl_id;
- status = (io_info->ops_sca.read)(io_info,
- chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
- chunk_info->fspace, chunk_info->mspace, buf);
- io_info->dxpl_id = temp_id;
- }
- else if((H5S_SELECT_IS_REGULAR(chunk_info->fspace) == FALSE) ||
- (H5S_SELECT_IS_REGULAR(chunk_info->mspace) == FALSE)) {
-#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
- /* Perform the independent read operation */
- temp_id = io_info->dxpl_id;
- io_info->dxpl_id = io_info->dp_dxpl_id;
- status = (io_info->ops_sca.read)(io_info,
- chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
- chunk_info->fspace, chunk_info->mspace, buf);
- io_info->dxpl_id = temp_id;
-#else /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */
- /* Perform the actual collective read operation */
- status = (io_info->ops.read)(io_info,
- chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
- chunk_info->fspace, chunk_info->mspace, buf);
-#endif /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */
- }
- else {
- /* For regular selection,
- if MPI_COMPLEX_DERIVED_DATATYPE is not defined,
- unless spaces for all processors are regular, independent read operation should be performed.*/
-#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
- if(!all_regular) {
- /* Perform the independent read operation */
- temp_id = io_info->dxpl_id;
- io_info->dxpl_id = io_info->dp_dxpl_id;
- status = (io_info->ops_sca.read)(io_info,
- chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
- chunk_info->fspace, chunk_info->mspace, buf);
- io_info->dxpl_id = temp_id;
+ if(!is_regular) {
+ /* Switch to independent I/O (temporarily) */
+ make_ind = TRUE;
+ make_coll = TRUE;
}
else {
- /* For regular collective read in parallel*/
- /* Perform the read operation */
- status = (io_info->ops.read)(io_info,
- chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
- chunk_info->fspace, chunk_info->mspace, buf);
- }
-#else /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */
- /* For regular collective read in parallel*/
- /* Perform the read operation */
- status = (io_info->ops.read)(io_info,
- chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
- chunk_info->fspace, chunk_info->mspace, buf);
+ /* For regular selection,
+ if MPI_COMPLEX_DERIVED_DATATYPE is not defined,
+ unless spaces for all processors are regular, independent read operation should be performed.*/
+ if(!all_regular) {
+ /* Switch to independent I/O (temporarily) */
+ make_ind = TRUE;
+ make_coll = TRUE;
+ } /* end if */
+ } /* end else */
+ } /* end else */
#endif /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */
- }
- }
- else {
- /* For regular independent read in parallel*/
- /* Perform the read operation */
- status = (io_info->ops.read)(io_info,
- chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
- chunk_info->fspace, chunk_info->mspace, buf);
- }
-#else /* H5_HAVE_PARALLEL */
- /* Perform the actual read operation for sequential*/
+ } /* end if */
+
+ /* Switch to independent I/O */
+ if(make_ind)
+ if(H5D_ioinfo_make_ind(io_info) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't switch to independent I/O")
+#endif /* H5_HAVE_PARALLEL */
+
+ /* Perform the actual read operation */
status = (io_info->ops.read)(io_info,
chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
chunk_info->fspace, chunk_info->mspace, buf);
+
+#ifdef H5_HAVE_PARALLEL
+ /* Switch back to collective I/O */
+ if(make_coll)
+ if(H5D_ioinfo_make_coll(io_info) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't switch to independent I/O")
#endif /* H5_HAVE_PARALLEL */
/* Check return value from optimized read */
@@ -2110,14 +2035,11 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts,
uint8_t *tconv_buf = NULL; /*data type conv buffer */
uint8_t *bkg_buf = NULL; /*background buffer */
H5D_storage_t store; /*union of EFL and chunk pointer in file space */
- herr_t ret_value = SUCCEED; /*return value */
-
#ifdef H5_HAVE_PARALLEL
- hid_t temp_id;
- int count_chunk,mpi_rank,mpi_code,min_num_chunk,is_regular,all_regular = 0;
- MPI_Comm comm;
-
+ int count_chunk; /* Number of chunks accessed */
+ int min_num_chunk; /* Number of chunks to access collectively */
#endif
+ herr_t ret_value = SUCCEED; /*return value */
FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_write)
@@ -2139,18 +2061,21 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts,
#ifdef H5_HAVE_PARALLEL
if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) {
- if(H5D_mpio_get_mini_chunk(dataset,mem_space,file_space,&min_num_chunk)<0)
+ if(H5D_mpio_get_min_chunk(io_info, &fm, &min_num_chunk)<0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get minimum number of chunk")
}
count_chunk = 0;
-#endif
+#endif /* H5_HAVE_PARALLEL */
/* Get first node in chunk skip list */
chunk_node=H5SL_first(fm.fsel);
/* Iterate through chunks to be operated on */
while(chunk_node) {
- H5D_chunk_info_t *chunk_info; /* chunk information */
+ H5D_chunk_info_t *chunk_info; /* Chunk information */
+#ifdef H5_HAVE_PARALLEL
+ hbool_t make_ind, make_coll; /* Flags to indicate that the MPI mode should change */
+#endif /* H5_HAVE_PARALLEL */
/* Get the actual chunk information from the skip list node */
chunk_info=H5SL_item(chunk_node);
@@ -2160,99 +2085,71 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts,
store.chunk.index = chunk_info->index;
#ifdef H5_HAVE_PARALLEL
- count_chunk++;
+ /* Reset flags for changing parallel I/O mode */
+ make_ind = make_coll = FALSE;
+
if(io_info->dxpl_cache->xfer_mode == H5FD_MPIO_COLLECTIVE) {
+ /* Increment chunk we are operating on */
+ count_chunk++;
+
/* If the number of chunk is greater than minimum number of chunk,
Do independent write */
-
- if(count_chunk <= min_num_chunk) {
+ if(count_chunk > min_num_chunk) {
+ /* Switch to independent I/O (permanently) */
+ make_ind = TRUE;
+ }
#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
+ else {
+ int is_regular; /* If this chunk's selections are regular */
+ int mpi_code; /* MPI error code */
+ int all_regular = 0; /* If this chunk's selections are regular on all processes */
+
if(H5S_SELECT_IS_REGULAR(chunk_info->fspace) == TRUE &&
H5S_SELECT_IS_REGULAR(chunk_info->mspace) == TRUE)
is_regular = 1;
else
is_regular = 0;
- /* Getting MPI communicator and rank */
- if((comm = H5F_mpi_get_comm(dataset->ent.file))==MPI_COMM_NULL)
- HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator")
- if((mpi_rank = H5F_mpi_get_rank(dataset->ent.file))<0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI rank")
-
if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&all_regular,&is_regular,1,MPI_INT,MPI_MIN,0,comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&all_regular,1,MPI_INT,0,comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
-#endif /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */
- }
- if(count_chunk > min_num_chunk) {
- temp_id = io_info->dxpl_id;
- io_info->dxpl_id = io_info->dp_dxpl_id;
- status = (io_info->ops_sca.write)(io_info,
- chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
- chunk_info->fspace, chunk_info->mspace, buf);
-
- /* Check return value from optimized write */
- if (status<0)
- HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed")
- io_info->dxpl_id = temp_id;
- }
- else if((H5S_SELECT_IS_REGULAR(chunk_info->fspace) == FALSE) ||
- (H5S_SELECT_IS_REGULAR(chunk_info->mspace) == FALSE)) {
-
-#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
- /* Perform the independent write operation */
- temp_id = io_info->dxpl_id;
- io_info->dxpl_id = io_info->dp_dxpl_id;
- status = (io_info->ops_sca.write)(io_info,
- chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
- chunk_info->fspace, chunk_info->mspace, buf);
- io_info->dxpl_id = temp_id;
-#else /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */
- /* Perform the actual collective write operation */
- status = (io_info->ops.write)(io_info,
- chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
- chunk_info->fspace, chunk_info->mspace, buf);
-#endif /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */
- }
- else {
-#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
- if(!all_regular) {
- /* Perform the independent write operation */
- temp_id = io_info->dxpl_id;
- io_info->dxpl_id = io_info->dp_dxpl_id;
- status = (io_info->ops_sca.write)(io_info,
- chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
- chunk_info->fspace, chunk_info->mspace, buf);
- io_info->dxpl_id = temp_id;
- }
- else {
- /* For regular selection, perform the collective write operation */
- status = (io_info->ops.write)(io_info,
- chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
- chunk_info->fspace, chunk_info->mspace, buf);
+ if(!is_regular) {
+ /* Switch to independent I/O (temporarily) */
+ make_ind = TRUE;
+ make_coll = TRUE;
}
-#else /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */
- status = (io_info->ops.write)(io_info,
- chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
- chunk_info->fspace, chunk_info->mspace, buf);
+ else {
+ /* For regular selection,
+ if MPI_COMPLEX_DERIVED_DATATYPE is not defined,
+ unless spaces for all processors are regular, independent read operation should be performed.*/
+ if(!all_regular) {
+ /* Switch to independent I/O (temporarily) */
+ make_ind = TRUE;
+ make_coll = TRUE;
+ } /* end if */
+ } /* end else */
+ } /* end else */
#endif /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */
- }
- }
- else {
- /* For independent parallel write*/
- /* Perform the write operation */
- status = (io_info->ops.write)(io_info,
- chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
- chunk_info->fspace, chunk_info->mspace, buf);
- }
+ } /* end if */
-#else /* H5_HAVE_PARALLEL */
- /* Perform the actual write operation for sequential*/
+ /* Switch to independent I/O */
+ if(make_ind)
+ if(H5D_ioinfo_make_ind(io_info) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't switch to independent I/O")
+#endif /* H5_HAVE_PARALLEL */
+
+ /* Perform the actual write operation */
status = (io_info->ops.write)(io_info,
chunk_info->chunk_points, H5T_get_size(dataset->shared->type),
chunk_info->fspace, chunk_info->mspace, buf);
+
+#ifdef H5_HAVE_PARALLEL
+ /* Switch back to collective I/O */
+ if(make_coll)
+ if(H5D_ioinfo_make_coll(io_info) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't switch to independent I/O")
#endif /* H5_HAVE_PARALLEL */
/* Check return value from optimized write */
@@ -2482,102 +2379,6 @@ done:
FUNC_LEAVE_NOAPI(ret_value)
} /* H5D_chunk_write() */
-#ifdef H5_HAVE_PARALLEL
-
-/*-------------------------------------------------------------------------
- * Function: H5D_io_assist_mpio
- *
- * Purpose: Common logic for determining if the MPI transfer mode should
- * be changed.
- *
- * Return: Non-negative on success/Negative on failure
- *
- * Programmer: Raymond Lu
- * Thursday, April 10, 2003
- *
- * Modifications:
- * QAK - 2003/04/17
- * Hacked on it a lot. :-)
- *
- *-------------------------------------------------------------------------
- */
-static herr_t
-H5D_io_assist_mpio(hid_t dxpl_id, H5D_dxpl_cache_t *dxpl_cache,
- hbool_t *xfer_mode_changed)
-{
- herr_t ret_value = SUCCEED; /*return value */
-
- FUNC_ENTER_NOAPI_NOINIT(H5D_io_assist_mpio)
-
- /* The following may not handle a collective call correctly
- * since it does not ensure all processes can handle the write
- * request according to the MPI collective specification.
- * Do the collective request via independent mode.
- */
- if (dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE) {
- H5P_genplist_t *dx_plist; /* Data transer property list */
-
- /* Get the dataset transfer property list */
- if (NULL == (dx_plist = H5I_object(dxpl_id)))
- HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset creation property list")
-
- /* Kludge: change the xfer_mode to independent, handle the request,
- * then xfer_mode before return.
- * Better way is to get a temporary data_xfer property with
- * INDEPENDENT xfer_mode and pass it downwards.
- */
- dxpl_cache->xfer_mode = H5FD_MPIO_INDEPENDENT;
- if(H5P_set (dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &dxpl_cache->xfer_mode) < 0)
- HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set transfer mode")
- *xfer_mode_changed=TRUE; /* restore it before return */
- } /* end if */
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5D_io_assist_mpio() */
-
-
-/*-------------------------------------------------------------------------
- * Function: H5D_io_restore_mpio
- *
- * Purpose: Common logic for restoring MPI transfer mode
- *
- * Return: Non-negative on success/Negative on failure
- *
- * Programmer: Quincey Koziol
- * Friday, February 6, 2004
- *
- * Modifications:
- *
- *-------------------------------------------------------------------------
- */
-static herr_t
-H5D_io_restore_mpio(hid_t dxpl_id)
-{
- H5P_genplist_t *dx_plist; /* Data transer property list */
- H5FD_mpio_xfer_t xfer_mode; /*xfer_mode for this request */
- herr_t ret_value = SUCCEED; /*return value */
-
- FUNC_ENTER_NOAPI_NOINIT(H5D_io_restore_mpio)
-
- /* Get the dataset transfer property list */
- if (NULL == (dx_plist = H5I_object(dxpl_id)))
- HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset creation property list")
-
- /* Kludge: change the xfer_mode to independent, handle the request,
- * then xfer_mode before return.
- * Better way is to get a temporary data_xfer property with
- * INDEPENDENT xfer_mode and pass it downwards.
- */
- xfer_mode = H5FD_MPIO_COLLECTIVE;
- if(H5P_set (dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode) < 0)
- HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set transfer mode")
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5D_io_restore_mpio() */
-#endif /*H5_HAVE_PARALLEL*/
-
/*-------------------------------------------------------------------------
* Function: H5D_create_chunk_map
@@ -3515,15 +3316,7 @@ done:
*/
static herr_t
H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id,
- hid_t
-#if !(defined H5_HAVE_PARALLEL || defined H5S_DEBUG)
- UNUSED
-#endif /* H5_HAVE_PARALLEL */
- dp_dxpl_id, H5D_dxpl_cache_t
-#if !(defined H5_HAVE_PARALLEL || defined H5S_DEBUG)
- UNUSED
-#endif /* H5_HAVE_PARALLEL */
- *dp_dxpl_cache, const H5S_t
+ const H5S_t
#if !(defined H5_HAVE_PARALLEL || defined H5S_DEBUG)
UNUSED
#endif /* H5_HAVE_PARALLEL */
@@ -3531,15 +3324,16 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id,
#if !(defined H5_HAVE_PARALLEL || defined H5S_DEBUG)
UNUSED
#endif /* H5_HAVE_PARALLEL */
- *file_space, unsigned
+ *file_space, H5T_path_t
#ifndef H5_HAVE_PARALLEL
UNUSED
#endif /* H5_HAVE_PARALLEL */
- flags, hbool_t
+ *tpath, unsigned
#ifndef H5_HAVE_PARALLEL
UNUSED
#endif /* H5_HAVE_PARALLEL */
- *use_par_opt_io, H5D_io_info_t *io_info)
+ flags,
+ H5D_io_info_t *io_info)
{
#ifdef H5_HAVE_PARALLEL
htri_t opt; /* Flag whether a selection is optimizable */
@@ -3557,7 +3351,7 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id,
HDassert(dset->ent.file);
HDassert(mem_space);
HDassert(file_space);
- HDassert(use_par_opt_io);
+ HDassert(tpath);
HDassert(io_info);
/* Set up "normal" I/O fields */
@@ -3570,43 +3364,71 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id,
io_info->ops=dset->shared->io_ops;
#ifdef H5_HAVE_PARALLEL
+ /* Get MPI communicator */
+ if((io_info->comm = H5F_mpi_get_comm(dset->ent.file)) == MPI_COMM_NULL)
+ HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator")
+
/*
* Check if we can set direct MPI-IO read/write functions
*/
- io_info->dp_dxpl_id = dp_dxpl_id;
- io_info->dp_dxpl_cache = dp_dxpl_cache;
-
opt=H5D_mpio_opt_possible(dset,mem_space,file_space,flags);
if(opt==FAIL)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "invalid check for direct IO dataspace ");
opt = H5D_get_collective_io_consensus(dset->ent.file, opt, flags);
-
if ( opt == FAIL )
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, \
- "check for collective I/O consensus failed.");
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "check for collective I/O consensus failed.");
/* Check if we can use the optimized parallel I/O routines */
if(opt==TRUE) {
/* Set the pointers to the MPI-specific routines */
io_info->ops.read = H5D_mpio_select_read;
io_info->ops.write = H5D_mpio_select_write;
- io_info->ops_sca.read = H5D_select_read;
- io_info->ops_sca.write = H5D_select_write;
/* Indicate that the I/O will use collective */
- *use_par_opt_io=TRUE;
+ io_info->use_par_opt_io=TRUE;
} /* end if */
else {
- /* Indicate that the I/O will _NOT_ be parallel, use independent IO */
- *use_par_opt_io=FALSE;
+ /* Set the pointers to the non-MPI-specific routines */
io_info->ops.read = H5D_select_read;
io_info->ops.write = H5D_select_write;
+ /* Indicate that the I/O will _NOT_ be parallel, use independent IO */
+ io_info->use_par_opt_io=FALSE;
} /* end else */
-#else
+
+ /* Start in the "not modified" state */
+ io_info->xfer_mode_changed=FALSE;
+
+ /* If we can't or won't be doing collective I/O, but the user
+ * asked for collective I/O, change the request to use independent I/O
+ */
+ if(!(io_info->use_par_opt_io && H5T_path_noop(tpath) &&
+ H5Z_xform_noop(dxpl_cache->data_xform_prop))
+ && io_info->dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE) {
+ H5P_genplist_t *dx_plist; /* Data transer property list */
+
+ /* Get the dataset transfer property list */
+ if (NULL == (dx_plist = H5I_object(dxpl_id)))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset creation property list")
+
+ /* Change the xfer_mode to independent for handling the I/O */
+ io_info->dxpl_cache->xfer_mode = H5FD_MPIO_INDEPENDENT;
+ if(H5P_set (dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &io_info->dxpl_cache->xfer_mode) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set transfer mode")
+
+ /* Set the pointers to the non-MPI-specific routines */
io_info->ops.read = H5D_select_read;
io_info->ops.write = H5D_select_write;
+
+ /* Indicate that the transfer mode should be restored before returning
+ * to user.
+ */
+ io_info->xfer_mode_changed=TRUE;
+ } /* end if */
+#else /* H5_HAVE_PARALLEL */
+ io_info->ops.read = H5D_select_read;
+ io_info->ops.write = H5D_select_write;
#endif /* H5_HAVE_PARALLEL */
#ifdef H5S_DEBUG
@@ -3624,118 +3446,173 @@ done:
#ifdef H5_HAVE_PARALLEL
/*-------------------------------------------------------------------------
- * Function: H5D_mpio_get_mini_chunk
+ * Function: H5D_ioinfo_make_ind
*
- * Purpose: Routine for obtaining minimum number of chunks to cover
- * hyperslab selection selected by all processors.
+ * Purpose: Switch to MPI independent I/O
*
* Return: Non-negative on success/Negative on failure
*
- * Programmer:
+ * Programmer: Quincey Koziol
+ * Friday, August 12, 2005
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
-static herr_t H5D_mpio_get_mini_chunk(const H5D_t *dset,
- const H5S_t *mem_space,
- const H5S_t *file_space,
- int *min_chunkf)
+static herr_t
+H5D_ioinfo_make_ind(H5D_io_info_t *io_info)
{
- hsize_t chunk_dim[H5O_LAYOUT_NDIMS]; /* Chunk dimensions */
- hsize_t startf[H5S_MAX_RANK], /* Selection start bounds */
- endf[H5S_MAX_RANK]; /* Selection end bounds */
- unsigned dim_rankf; /* Number of dimensions of file dataspace */
- int pcheck_hyper,check_hyper, /* Flags for checking if selection is in one chunk */
- tnum_chunkf, /* Number of chunks selection overlaps */
- max_chunkf, /* Maximum number of chunks selection overlaps */
- num_chunks_same; /* Flag indicating whether all processes have the same # of chunks to operate on */
- unsigned dim_chunks; /* Temporary number of chunks in a dimension */
- MPI_Comm comm; /* MPI communicator for file */
- int mpi_rank; /* Rank in MPI communicator */
- int mpi_code; /* MPI return code */
- unsigned u; /* Local index variable */
- herr_t ret_value = SUCCEED;
+ H5P_genplist_t *dx_plist; /* Data transer property list */
+ herr_t ret_value = SUCCEED; /*return value */
- FUNC_ENTER_NOAPI_NOINIT(H5D_mpio_get_mini_chunk);
+ FUNC_ENTER_NOAPI_NOINIT(H5D_ioinfo_make_ind)
- /* Getting MPI communicator and rank */
- if((comm = H5F_mpi_get_comm(dset->ent.file))==MPI_COMM_NULL)
- HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator")
- if((mpi_rank = H5F_mpi_get_rank(dset->ent.file))<0)
- HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI rank")
+ /* Get the dataset transfer property list */
+ if (NULL == (dx_plist = H5I_object(io_info->dxpl_id)))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset transfer property list")
- dim_rankf = H5S_GET_EXTENT_NDIMS(file_space);
+ /* Change the xfer_mode to independent, handle the request,
+ * then set xfer_mode before return.
+ */
+ io_info->dxpl_cache->xfer_mode = H5FD_MPIO_INDEPENDENT;
+ if(H5P_set (dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &io_info->dxpl_cache->xfer_mode) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set transfer mode")
- /* Get selection bounds */
- if(H5S_SELECT_BOUNDS(file_space,startf,endf)==FAIL)
- HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE,FAIL, "invalid check for single selection blocks");
+ /* Set the pointers to the non-MPI-specific routines */
+ io_info->ops.read = H5D_select_read;
+ io_info->ops.write = H5D_select_write;
- /* Get chunk dimensions */
- for(u=0; u < dset->shared->layout.u.chunk.ndims; u++)
- chunk_dim[u] = dset->shared->layout.u.chunk.dim[u];
+ /* Indicate that the transfer mode should be restored before returning
+ * to user.
+ */
+ io_info->xfer_mode_changed=TRUE;
- /* Compute the number of chunks covered by the selection on this process */
- tnum_chunkf = 1;
- for (u=0; u<dim_rankf; u++) {
- dim_chunks = (endf[u]/chunk_dim[u]-startf[u]/chunk_dim[u])+1;
- tnum_chunkf = dim_chunks*tnum_chunkf;
- }
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D_ioinfo_make_ind() */
- /* Determine the minimum and maximum # of chunks for all processes */
- if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&tnum_chunkf,min_chunkf,1,MPI_INT,MPI_MIN,0,comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
+
+/*-------------------------------------------------------------------------
+ * Function: H5D_ioinfo_make_coll
+ *
+ * Purpose: Switch to MPI collective I/O
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: Quincey Koziol
+ * Friday, August 12, 2005
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5D_ioinfo_make_coll(H5D_io_info_t *io_info)
+{
+ H5P_genplist_t *dx_plist; /* Data transer property list */
+ herr_t ret_value = SUCCEED; /*return value */
- /* Broadcast the flag indicating the number of chunks are the same */
- if (MPI_SUCCESS != (mpi_code= MPI_Bcast(min_chunkf,1,MPI_INT,0,comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
+ FUNC_ENTER_NOAPI_NOINIT(H5D_ioinfo_make_coll)
+
+ /* Get the dataset transfer property list */
+ if (NULL == (dx_plist = H5I_object(io_info->dxpl_id)))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset transfer property list")
+
+ /* Change the xfer_mode to independent, handle the request,
+ * then set xfer_mode before return.
+ */
+ io_info->dxpl_cache->xfer_mode = H5FD_MPIO_COLLECTIVE;
+ if(H5P_set (dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &io_info->dxpl_cache->xfer_mode) < 0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set transfer mode")
+
+ /* Set the pointers to the MPI-specific routines */
+ io_info->ops.read = H5D_mpio_select_read;
+ io_info->ops.write = H5D_mpio_select_write;
+
+ /* Indicate that the transfer mode should _NOT_ be restored before returning
+ * to user.
+ */
+ io_info->xfer_mode_changed=FALSE;
done:
- FUNC_LEAVE_NOAPI(ret_value);
-} /* end H5D_mpio_get_mini_chunk() */
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D_ioinfo_make_coll() */
/*-------------------------------------------------------------------------
- * Function: H5D_obtain_duplicate_pid
+ * Function: H5D_ioinfo_term
*
- * Purpose: Routine for obtaining a copy property list ID of
- * data transfer property.
+ * Purpose: Common logic for terminating an I/O info object
+ * (Only used for restoring MPI transfer mode currently)
*
* Return: Non-negative on success/Negative on failure
*
- * Programmer:
+ * Programmer: Quincey Koziol
+ * Friday, February 6, 2004
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
-static herr_t H5D_obtain_duplicate_pid(hid_t dxpl_id,
- hid_t* dp_id,
- H5D_dxpl_cache_t **cache)
+static herr_t
+H5D_ioinfo_term(H5D_io_info_t *io_info)
{
- H5FD_mpio_xfer_t xfer_mode;
- H5P_genplist_t *dp_dx_plist; /* Data transer property list */
- herr_t ret_value=SUCCEED;
-
- FUNC_ENTER_NOAPI_NOINIT(H5D_obtain_duplicate_pid)
+ H5P_genplist_t *dx_plist; /* Data transer property list */
+ herr_t ret_value = SUCCEED; /*return value */
- *dp_id = H5Pcopy(dxpl_id);
+ FUNC_ENTER_NOAPI_NOINIT(H5D_ioinfo_term)
/* Get the dataset transfer property list */
- if (NULL == (dp_dx_plist = H5I_object(*dp_id)))
- HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset creation property list")
+ if (NULL == (dx_plist = H5I_object(io_info->dxpl_id)))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset transfer property list")
- /* Switch to independent I/O */
- xfer_mode = H5FD_MPIO_INDEPENDENT;
- if(H5P_set (dp_dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode) < 0)
+ /* Restore the original parallel I/O mode */
+ io_info->dxpl_cache->xfer_mode = H5FD_MPIO_COLLECTIVE;
+ if(H5P_set (dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &io_info->dxpl_cache->xfer_mode) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set transfer mode")
- /* Fill the DXPL cache values for later use */
- if (H5D_get_dxpl_cache(*dp_id,cache)<0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't fill dxpl cache")
-
done:
FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5D_obtain_duplicate_pid() */
+} /* end H5D_ioinfo_term() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5D_mpio_get_min_chunk
+ *
+ * Purpose: Routine for obtaining minimum number of chunks to cover
+ * hyperslab selection selected by all processors.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer:
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5D_mpio_get_min_chunk(const H5D_io_info_t *io_info,
+ const fm_map *fm, int *min_chunkf)
+{
+ int num_chunkf; /* Number of chunks to iterate over */
+ int mpi_code; /* MPI return code */
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI_NOINIT(H5D_mpio_get_min_chunk);
+
+ /* Get the number of chunks to perform I/O on */
+ num_chunkf = H5SL_count(fm->fsel);
+
+ /* Determine the minimum and maximum # of chunks for all processes */
+ if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&num_chunkf,min_chunkf,1,MPI_INT,MPI_MIN,0,io_info->comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
+
+ /* Broadcast the flag indicating the number of chunks are the same */
+ if (MPI_SUCCESS != (mpi_code= MPI_Bcast(min_chunkf,1,MPI_INT,0,io_info->comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value);
+} /* end H5D_mpio_get_min_chunk() */
#endif /*H5_HAVE_PARALLEL*/
diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h
index 3137f53..cf416f3 100644
--- a/src/H5Dpkg.h
+++ b/src/H5Dpkg.h
@@ -98,15 +98,18 @@ typedef struct H5D_io_ops_t {
/* Typedef for raw data I/O operation info */
typedef struct H5D_io_info_t {
H5D_t *dset; /* Pointer to dataset being operated on */
- const H5D_dxpl_cache_t *dxpl_cache; /* Pointer to cache DXPL info */
+#ifndef H5_HAVE_PARALLEL
+ const
+#endif /* H5_HAVE_PARALLEL */
+ H5D_dxpl_cache_t *dxpl_cache; /* Pointer to cache DXPL info */
hid_t dxpl_id; /* Original DXPL ID */
#ifdef H5_HAVE_PARALLEL
- hid_t dp_dxpl_id;
- H5D_dxpl_cache_t *dp_dxpl_cache;
-#endif
+ MPI_Comm comm; /* MPI communicator for file */
+ hbool_t use_par_opt_io; /* Whether the 'optimized' I/O routines with be parallel */
+ hbool_t xfer_mode_changed; /* Whether the transfer mode was changed */
+#endif /* H5_HAVE_PARALLEL */
const H5D_storage_t *store; /* Dataset storage info */
H5D_io_ops_t ops; /* I/O operation function pointers */
- H5D_io_ops_t ops_sca;
#ifdef H5S_DEBUG
H5S_iostats_t *stats; /* I/O statistics */
#endif /* H5S_DEBUG */