summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeil Fortner <nfortne2@hdfgroup.org>2021-09-16 19:43:57 (GMT)
committerGitHub <noreply@github.com>2021-09-16 19:43:57 (GMT)
commitc1e73fd70ff3c49b05712a3fb7329f82fae3401c (patch)
treea871d249a660cfda3648e381a7bf69417ad5a4ed
parent19f94643389881e1f2c27c688c405f9ba5568c18 (diff)
parent2023495f320ed22febe41eb6c95a4bc687a8b676 (diff)
downloadhdf5-c1e73fd70ff3c49b05712a3fb7329f82fae3401c.zip
hdf5-c1e73fd70ff3c49b05712a3fb7329f82fae3401c.tar.gz
hdf5-c1e73fd70ff3c49b05712a3fb7329f82fae3401c.tar.bz2
Merge pull request #986 from fortnern/parallel_selection_io
Add support for independent parallel I/O with selection I/O
-rw-r--r--src/H5.c10
-rw-r--r--src/H5Dchunk.c24
-rw-r--r--src/H5Dcompact.c4
-rw-r--r--src/H5Dcontig.c31
-rw-r--r--src/H5Defl.c4
-rw-r--r--src/H5Dio.c11
-rw-r--r--src/H5Dpkg.h3
-rw-r--r--src/H5FDint.c12
-rw-r--r--src/H5FDmpio.c130
9 files changed, 167 insertions, 62 deletions
diff --git a/src/H5.c b/src/H5.c
index cbd240a..0c7c8c1 100644
--- a/src/H5.c
+++ b/src/H5.c
@@ -147,6 +147,7 @@ done:
herr_t
H5_init_library(void)
{
+ char *env_use_select_io = NULL;
herr_t ret_value = SUCCEED;
/* Set the 'library initialized' flag as early as possible, to avoid
@@ -278,6 +279,15 @@ H5_init_library(void)
if (H5VL_init_phase2() < 0)
HGOTO_ERROR(H5E_FUNC, H5E_CANTINIT, FAIL, "unable to initialize vol interface")
+ /* Check for HDF5_USE_SELECTION_IO env variable */
+ env_use_select_io = HDgetenv("HDF5_USE_SELECTION_IO");
+ if (NULL != env_use_select_io && HDstrcmp(env_use_select_io, "")
+ && HDstrcmp(env_use_select_io, "0") && HDstrcmp(env_use_select_io, "no")
+ && HDstrcmp(env_use_select_io, "No") && HDstrcmp(env_use_select_io, "NO")
+ && HDstrcmp(env_use_select_io, "false") && HDstrcmp(env_use_select_io, "False")
+ && HDstrcmp(env_use_select_io, "FALSE"))
+ H5_use_selection_io_g = TRUE;
+
/* Debugging? */
H5__debug_mask("-all");
H5__debug_mask(HDgetenv("HDF5_DEBUG"));
diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c
index 5447233..dcc3baa 100644
--- a/src/H5Dchunk.c
+++ b/src/H5Dchunk.c
@@ -254,7 +254,7 @@ typedef struct H5D_chunk_coll_info_t {
/* Chunked layout operation callbacks */
static herr_t H5D__chunk_construct(H5F_t *f, H5D_t *dset);
static herr_t H5D__chunk_init(H5F_t *f, const H5D_t *dset, hid_t dapl_id);
-static herr_t H5D__chunk_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
+static herr_t H5D__chunk_io_init(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
hsize_t nelmts, const H5S_t *file_space, const H5S_t *mem_space,
H5D_chunk_map_t *fm);
static herr_t H5D__chunk_io_init_selections(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
@@ -1057,7 +1057,7 @@ H5D__chunk_is_data_cached(const H5D_shared_t *shared_dset)
*-------------------------------------------------------------------------
*/
static herr_t
-H5D__chunk_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts,
+H5D__chunk_io_init(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts,
const H5S_t *file_space, const H5S_t *mem_space, H5D_chunk_map_t *fm)
{
const H5D_t *dataset = io_info->dset; /* Local pointer to dataset info */
@@ -1065,6 +1065,7 @@ H5D__chunk_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t *type_inf
htri_t file_space_normalized = FALSE; /* File dataspace was normalized */
unsigned f_ndims; /* The number of dimensions of the file's dataspace */
int sm_ndims; /* The number of dimensions of the memory buffer's dataspace (signed) */
+ htri_t use_selection_io = FALSE; /* Whether to use selection I/O */
unsigned u; /* Local index variable */
herr_t ret_value = SUCCEED; /* Return value */
@@ -1120,6 +1121,11 @@ H5D__chunk_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t *type_inf
if (H5D__chunk_io_init_selections(io_info, type_info, fm) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to create file and memory chunk selections")
+ /* Check if we're performing selection I/O and save the result */
+ if ((use_selection_io = H5D__chunk_may_use_select_io(io_info)) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't check if selection I/O is possible")
+ io_info->use_select_io = (hbool_t)use_selection_io;
+
done:
/* Reset the global dataspace info */
fm->file_space = NULL;
@@ -2535,7 +2541,6 @@ H5D__chunk_read(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_
H5S_t * chunk_file_spaces_static[8]; /* Static buffer for chunk_file_spaces */
haddr_t * chunk_addrs = NULL; /* Array of chunk addresses */
haddr_t chunk_addrs_static[8]; /* Static buffer for chunk_addrs */
- htri_t use_selection_io = FALSE; /* Whether to use selection I/O */
herr_t ret_value = SUCCEED; /*return value */
FUNC_ENTER_STATIC
@@ -2567,12 +2572,8 @@ H5D__chunk_read(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_
skip_missing_chunks = TRUE;
}
- /* Check if we're performing selection I/O */
- if ((use_selection_io = H5D__chunk_may_use_select_io(io_info)) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't check if selection I/O is possible")
-
/* Different blocks depending on whether we're using selection I/O */
- if (use_selection_io) {
+ if (io_info->use_select_io) {
size_t num_chunks;
size_t element_sizes[2] = {type_info->dst_type_size, 0};
void * bufs[2] = {io_info->u.rbuf, NULL};
@@ -2815,7 +2816,6 @@ H5D__chunk_write(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize
H5S_t * chunk_file_spaces_static[8]; /* Static buffer for chunk_file_spaces */
haddr_t * chunk_addrs = NULL; /* Array of chunk addresses */
haddr_t chunk_addrs_static[8]; /* Static buffer for chunk_addrs */
- htri_t use_selection_io = FALSE; /* Whether to use selection I/O */
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_STATIC
@@ -2843,12 +2843,8 @@ H5D__chunk_write(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize
/* Initialize temporary compact storage info */
cpt_store.compact.dirty = &cpt_dirty;
- /* Check if we're performing selection I/O */
- if ((use_selection_io = H5D__chunk_may_use_select_io(io_info)) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't check if selection I/O is possible")
-
/* Different blocks depending on whether we're using selection I/O */
- if (use_selection_io) {
+ if (io_info->use_select_io) {
size_t num_chunks;
size_t element_sizes[2] = {type_info->dst_type_size, 0};
const void *bufs[2] = {io_info->u.wbuf, NULL};
diff --git a/src/H5Dcompact.c b/src/H5Dcompact.c
index fe41298..9a6d4b2 100644
--- a/src/H5Dcompact.c
+++ b/src/H5Dcompact.c
@@ -54,7 +54,7 @@
/* Layout operation callbacks */
static herr_t H5D__compact_construct(H5F_t *f, H5D_t *dset);
static hbool_t H5D__compact_is_space_alloc(const H5O_storage_t *storage);
-static herr_t H5D__compact_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
+static herr_t H5D__compact_io_init(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
hsize_t nelmts, const H5S_t *file_space, const H5S_t *mem_space,
H5D_chunk_map_t *cm);
static ssize_t H5D__compact_readvv(const H5D_io_info_t *io_info, size_t dset_max_nseq, size_t *dset_curr_seq,
@@ -227,7 +227,7 @@ H5D__compact_is_space_alloc(const H5O_storage_t H5_ATTR_UNUSED *storage)
*-------------------------------------------------------------------------
*/
static herr_t
-H5D__compact_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t H5_ATTR_UNUSED *type_info,
+H5D__compact_io_init(H5D_io_info_t *io_info, const H5D_type_info_t H5_ATTR_UNUSED *type_info,
hsize_t H5_ATTR_UNUSED nelmts, const H5S_t H5_ATTR_UNUSED *file_space,
const H5S_t H5_ATTR_UNUSED *mem_space, H5D_chunk_map_t H5_ATTR_UNUSED *cm)
{
diff --git a/src/H5Dcontig.c b/src/H5Dcontig.c
index 3b104b8..d2a84da 100644
--- a/src/H5Dcontig.c
+++ b/src/H5Dcontig.c
@@ -91,7 +91,7 @@ typedef struct H5D_contig_writevv_ud_t {
/* Layout operation callbacks */
static herr_t H5D__contig_construct(H5F_t *f, H5D_t *dset);
static herr_t H5D__contig_init(H5F_t *f, const H5D_t *dset, hid_t dapl_id);
-static herr_t H5D__contig_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
+static herr_t H5D__contig_io_init(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
hsize_t nelmts, const H5S_t *file_space, const H5S_t *mem_space,
H5D_chunk_map_t *cm);
static ssize_t H5D__contig_readvv(const H5D_io_info_t *io_info, size_t dset_max_nseq, size_t *dset_curr_seq,
@@ -551,16 +551,25 @@ H5D__contig_is_data_cached(const H5D_shared_t *shared_dset)
*-------------------------------------------------------------------------
*/
static herr_t
-H5D__contig_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t H5_ATTR_UNUSED *type_info,
+H5D__contig_io_init(H5D_io_info_t *io_info, const H5D_type_info_t H5_ATTR_UNUSED *type_info,
hsize_t H5_ATTR_UNUSED nelmts, const H5S_t H5_ATTR_UNUSED *file_space,
const H5S_t H5_ATTR_UNUSED *mem_space, H5D_chunk_map_t H5_ATTR_UNUSED *cm)
{
- FUNC_ENTER_STATIC_NOERR
+ htri_t use_selection_io = FALSE; /* Whether to use selection I/O */
+ htri_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
io_info->store->contig.dset_addr = io_info->dset->shared->layout.storage.u.contig.addr;
io_info->store->contig.dset_size = io_info->dset->shared->layout.storage.u.contig.size;
- FUNC_LEAVE_NOAPI(SUCCEED)
+ /* Check if we're performing selection I/O */
+ if ((use_selection_io = H5D__contig_may_use_select_io(io_info, H5D_IO_OP_READ)) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't check if selection I/O is possible")
+ io_info->use_select_io = (hbool_t)use_selection_io;
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
} /* end H5D__contig_io_init() */
/*-------------------------------------------------------------------------
@@ -632,7 +641,6 @@ herr_t
H5D__contig_read(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts,
const H5S_t *file_space, const H5S_t *mem_space, H5D_chunk_map_t H5_ATTR_UNUSED *fm)
{
- htri_t use_selection_io = FALSE; /* Whether to use selection I/O */
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_PACKAGE
@@ -644,11 +652,7 @@ H5D__contig_read(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize
HDassert(mem_space);
HDassert(file_space);
- /* Check if we're performing selection I/O */
- if ((use_selection_io = H5D__contig_may_use_select_io(io_info, H5D_IO_OP_READ)) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't check if selection I/O is possible")
-
- if (use_selection_io) {
+ if (io_info->use_select_io) {
size_t dst_type_size = type_info->dst_type_size;
/* Issue selection I/O call (we can skip the page buffer because we've
@@ -684,7 +688,6 @@ herr_t
H5D__contig_write(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts,
const H5S_t *file_space, const H5S_t *mem_space, H5D_chunk_map_t H5_ATTR_UNUSED *fm)
{
- htri_t use_selection_io = FALSE; /* Whether to use selection I/O */
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_PACKAGE
@@ -696,11 +699,7 @@ H5D__contig_write(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsiz
HDassert(mem_space);
HDassert(file_space);
- /* Check if we're performing selection I/O */
- if ((use_selection_io = H5D__contig_may_use_select_io(io_info, H5D_IO_OP_WRITE)) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't check if selection I/O is possible")
-
- if (use_selection_io) {
+ if (io_info->use_select_io) {
size_t dst_type_size = type_info->dst_type_size;
/* Issue selection I/O call (we can skip the page buffer because we've
diff --git a/src/H5Defl.c b/src/H5Defl.c
index 85c9dba..fea70b0 100644
--- a/src/H5Defl.c
+++ b/src/H5Defl.c
@@ -61,7 +61,7 @@ typedef struct H5D_efl_writevv_ud_t {
/* Layout operation callbacks */
static herr_t H5D__efl_construct(H5F_t *f, H5D_t *dset);
-static herr_t H5D__efl_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts,
+static herr_t H5D__efl_io_init(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts,
const H5S_t *file_space, const H5S_t *mem_space, H5D_chunk_map_t *cm);
static ssize_t H5D__efl_readvv(const H5D_io_info_t *io_info, size_t dset_max_nseq, size_t *dset_curr_seq,
size_t dset_len_arr[], hsize_t dset_offset_arr[], size_t mem_max_nseq,
@@ -197,7 +197,7 @@ H5D__efl_is_space_alloc(const H5O_storage_t H5_ATTR_UNUSED *storage)
*-------------------------------------------------------------------------
*/
static herr_t
-H5D__efl_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t H5_ATTR_UNUSED *type_info,
+H5D__efl_io_init(H5D_io_info_t *io_info, const H5D_type_info_t H5_ATTR_UNUSED *type_info,
hsize_t H5_ATTR_UNUSED nelmts, const H5S_t H5_ATTR_UNUSED *file_space,
const H5S_t H5_ATTR_UNUSED *mem_space, H5D_chunk_map_t H5_ATTR_UNUSED *cm)
{
diff --git a/src/H5Dio.c b/src/H5Dio.c
index 03400cf..cc5f5bb 100644
--- a/src/H5Dio.c
+++ b/src/H5Dio.c
@@ -622,6 +622,10 @@ H5D__ioinfo_init(H5D_t *dset, const H5D_type_info_t *type_info, H5D_storage_t *s
io_info->io_ops.single_write = H5D__scatgath_write;
} /* end else */
+ /* Start with selection I/O off, layout callback will turn it on if
+ * appropriate */
+ io_info->use_select_io = FALSE;
+
#ifdef H5_HAVE_PARALLEL
/* Determine if the file was opened with an MPI VFD */
io_info->using_mpi_vfd = H5F_HAS_FEATURE(dset->oloc.file, H5FD_FEAT_HAS_MPI);
@@ -841,8 +845,11 @@ H5D__ioinfo_adjust(H5D_io_info_t *io_info, const H5D_t *dset, const H5S_t *file_
H5CX_set_mpio_actual_io_mode(H5D_MPIO_NO_COLLECTIVE);
} /* end if */
- /* Make any parallel I/O adjustments */
- if (io_info->using_mpi_vfd) {
+ /* Make any parallel I/O adjustments. Do not use collective code path if
+ * we're using selection I/O - in this case the file driver will handle it.
+ */
+ /* Check for selection/vector support in file driver? -NAF */
+ if (io_info->using_mpi_vfd /*&& !H5_use_selection_io_g*/) {
H5FD_mpio_xfer_t xfer_mode; /* Parallel transfer for this request */
htri_t opt; /* Flag whether a selection is optimizable */
diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h
index de04ac9..f9014a7 100644
--- a/src/H5Dpkg.h
+++ b/src/H5Dpkg.h
@@ -121,7 +121,7 @@ typedef herr_t (*H5D_layout_construct_func_t)(H5F_t *f, H5D_t *dset);
typedef herr_t (*H5D_layout_init_func_t)(H5F_t *f, const H5D_t *dset, hid_t dapl_id);
typedef hbool_t (*H5D_layout_is_space_alloc_func_t)(const H5O_storage_t *storage);
typedef hbool_t (*H5D_layout_is_data_cached_func_t)(const H5D_shared_t *shared_dset);
-typedef herr_t (*H5D_layout_io_init_func_t)(const struct H5D_io_info_t *io_info,
+typedef herr_t (*H5D_layout_io_init_func_t)(struct H5D_io_info_t *io_info,
const H5D_type_info_t *type_info, hsize_t nelmts,
const H5S_t *file_space, const H5S_t *mem_space,
struct H5D_chunk_map_t *cm);
@@ -223,6 +223,7 @@ typedef struct H5D_io_info_t {
H5D_layout_ops_t layout_ops; /* Dataset layout I/O operation function pointers */
H5D_io_ops_t io_ops; /* I/O operation function pointers */
H5D_io_op_type_t op_type;
+ hbool_t use_select_io; /* Whether to use selection I/O */
union {
void * rbuf; /* Pointer to buffer for read */
const void *wbuf; /* Pointer to buffer to write */
diff --git a/src/H5FDint.c b/src/H5FDint.c
index bdb7c68..f558b59 100644
--- a/src/H5FDint.c
+++ b/src/H5FDint.c
@@ -884,15 +884,15 @@ H5FD__read_selection_translate(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, uin
void *tmp_ptr;
/* Reallocate arrays */
- if (NULL == (tmp_ptr = H5MM_realloc(addrs, vec_arr_nalloc * 2)))
+ if (NULL == (tmp_ptr = H5MM_realloc(addrs, vec_arr_nalloc * sizeof(*addrs) * 2)))
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
"memory reallocation failed for address list")
addrs = tmp_ptr;
- if (NULL == (tmp_ptr = H5MM_realloc(sizes, vec_arr_nalloc * 2)))
+ if (NULL == (tmp_ptr = H5MM_realloc(sizes, vec_arr_nalloc * sizeof(*sizes) * 2)))
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
"memory reallocation failed for size list")
sizes = tmp_ptr;
- if (NULL == (tmp_ptr = H5MM_realloc(vec_bufs, vec_arr_nalloc * 2)))
+ if (NULL == (tmp_ptr = H5MM_realloc(vec_bufs, vec_arr_nalloc * sizeof(*vec_bufs) * 2)))
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
"memory reallocation failed for buffer list")
vec_bufs = tmp_ptr;
@@ -1503,15 +1503,15 @@ H5FD__write_selection_translate(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, ui
void *tmp_ptr;
/* Reallocate arrays */
- if (NULL == (tmp_ptr = H5MM_realloc(addrs, vec_arr_nalloc * 2)))
+ if (NULL == (tmp_ptr = H5MM_realloc(addrs, vec_arr_nalloc * sizeof(*addrs) * 2)))
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
"memory reallocation failed for address list")
addrs = tmp_ptr;
- if (NULL == (tmp_ptr = H5MM_realloc(sizes, vec_arr_nalloc * 2)))
+ if (NULL == (tmp_ptr = H5MM_realloc(sizes, vec_arr_nalloc * sizeof(*sizes) * 2)))
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
"memory reallocation failed for size list")
sizes = tmp_ptr;
- if (NULL == (tmp_ptr = H5MM_realloc(vec_bufs, vec_arr_nalloc * 2)))
+ if (NULL == (tmp_ptr = H5MM_realloc(vec_bufs, vec_arr_nalloc * sizeof(*vec_bufs) * 2)))
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
"memory reallocation failed for buffer list")
vec_bufs = tmp_ptr;
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index 1db700a..9a99b45 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -1281,7 +1281,7 @@ H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, hid_t H5_ATTR_UNU
#endif
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", 0)
- /* Get the type's size */
+ /* Get the type's size */
#if MPI_VERSION >= 3
if (MPI_SUCCESS != (mpi_code = MPI_Type_size_x(buf_type, &type_size)))
#else
@@ -1592,6 +1592,17 @@ H5FD__mpio_read_vector(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, uint32_t cou
H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode */
H5FD_mpio_collective_opt_t coll_opt_mode; /* whether we are doing collective or independent I/O */
int size_i;
+#if MPI_VERSION >= 3
+ MPI_Count bytes_read = 0; /* Number of bytes read in */
+ MPI_Count type_size; /* MPI datatype used for I/O's size */
+ MPI_Count io_size; /* Actual number of bytes requested */
+ MPI_Count n;
+#else
+ int bytes_read = 0; /* Number of bytes read in */
+ int type_size; /* MPI datatype used for I/O's size */
+ int io_size; /* Actual number of bytes requested */
+ int n;
+#endif
herr_t ret_value = SUCCEED;
FUNC_ENTER_STATIC
@@ -1615,19 +1626,6 @@ H5FD__mpio_read_vector(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, uint32_t cou
HDassert((count == 0) || (sizes[0] != 0));
HDassert((count == 0) || (types[0] != H5FD_MEM_NOLIST));
- /* sort the vector I/O request into increasing address order if required
- *
- * If the vector is already sorted, the base addresses of types, addrs, sizes,
- * and bufs will be returned in s_types, s_addrs, s_sizes, and s_bufs respectively.
- *
- * If the vector was not already sorted, new, sorted versions of types, addrs, sizes, and bufs
- * are allocated, populated, and returned in s_types, s_addrs, s_sizes, and s_bufs respectively.
- * In this case, this function must free the memory allocated for the sorted vectors.
- */
- if (H5FD_sort_vector_io_req(&vector_was_sorted, count, types, addrs, sizes, bufs, &s_types, &s_addrs,
- &s_sizes, &s_bufs) < 0)
- HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "can't sort vector I/O request")
-
/* Get the transfer mode from the API context
*
* This flag is set to H5FD_MPIO_COLLECTIVE if the API call is
@@ -1643,6 +1641,19 @@ H5FD__mpio_read_vector(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, uint32_t cou
if (count > 0) { /* create MPI derived types describing the vector write */
+ /* sort the vector I/O request into increasing address order if required
+ *
+ * If the vector is already sorted, the base addresses of types, addrs, sizes,
+ * and bufs will be returned in s_types, s_addrs, s_sizes, and s_bufs respectively.
+ *
+ * If the vector was not already sorted, new, sorted versions of types, addrs, sizes, and bufs
+ * are allocated, populated, and returned in s_types, s_addrs, s_sizes, and s_bufs respectively.
+ * In this case, this function must free the memory allocated for the sorted vectors.
+ */
+ if (H5FD_sort_vector_io_req(&vector_was_sorted, count, types, addrs, sizes, bufs, &s_types, &s_addrs,
+ &s_sizes, &s_bufs) < 0)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "can't sort vector I/O request")
+
if ((NULL == (mpi_block_lengths = (int *)HDmalloc((size_t)count * sizeof(int)))) ||
(NULL == (mpi_displacments = (MPI_Aint *)HDmalloc((size_t)count * sizeof(MPI_Aint)))) ||
(NULL == (mpi_bufs = (MPI_Aint *)HDmalloc((size_t)count * sizeof(MPI_Aint))))) {
@@ -1810,9 +1821,60 @@ H5FD__mpio_read_vector(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, uint32_t cou
if (MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE,
H5FD_mpi_native_g, file->info)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
+
+ /* How many bytes were actually read? */
+#if MPI_VERSION >= 3
+ if (MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_read)))
+#else
+ if (MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read)))
+#endif
+ HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
+
+ /* Get the type's size */
+#if MPI_VERSION >= 3
+ if (MPI_SUCCESS != (mpi_code = MPI_Type_size_x(buf_type, &type_size)))
+#else
+ if (MPI_SUCCESS != (mpi_code = MPI_Type_size(buf_type, &type_size)))
+#endif
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code)
+
+ /* Compute the actual number of bytes requested */
+ io_size = type_size * size_i;
+
+ /* Check for read failure */
+ if (bytes_read < 0 || bytes_read > io_size)
+ HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed")
+
+ /* Check for incomplete read */
+ n = io_size - bytes_read;
+ if (n > 0) {
+ i = (int)count - 1;
+
+ /* Iterate over sorted array in reverse, filling in zeroes to
+ * sections of the buffers that were not read to */
+ do {
+ HDassert(i >= 0);
+
+#if MPI_VERSION >= 3
+ io_size = MIN(n, (MPI_Count)s_sizes[i]);
+ bytes_read = (MPI_Count)s_sizes[i] - io_size;
+#else
+ io_size = MIN(n, (int)s_sizes[i]);
+ bytes_read = (int)s_sizes[i] - io_size;
+#endif
+ HDassert(bytes_read >= 0);
+
+ HDmemset((char *)bufs[i] + bytes_read, 0, (size_t)io_size);
+
+ n -= io_size;
+ i--;
+ } while (n > 0);
+ }
}
else if (count > 0) {
+ haddr_t max_addr = HADDR_MAX;
+
/* The read is part of an independent operation. As a result,
* we can't use MPI_File_set_view() (since it it a collective operation),
* and thus there is no point in setting up an MPI derived type, as
@@ -1832,7 +1894,7 @@ H5FD__mpio_read_vector(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, uint32_t cou
for (i = 0; i < (int)count; i++) {
- if (H5FD_mpi_haddr_to_MPIOff(s_addrs[i], &mpi_off) < 0)
+ if (H5FD_mpi_haddr_to_MPIOff(addrs[i], &mpi_off) < 0)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off")
@@ -1845,16 +1907,46 @@ H5FD__mpio_read_vector(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, uint32_t cou
}
else {
- size = s_sizes[i];
+ size = sizes[i];
}
}
size_i = (int)size; /* todo: fix potential for overflow */
- if (MPI_SUCCESS !=
- (mpi_code = MPI_File_read_at(file->f, mpi_off, s_bufs[i], size_i, MPI_BYTE, &mpi_stat)))
+ /* Check if we acutally need to do I/O */
+ if (addrs[i] < max_addr) {
+ /* Issue read */
+ if (MPI_SUCCESS !=
+ (mpi_code = MPI_File_read_at(file->f, mpi_off, bufs[i], size_i, MPI_BYTE, &mpi_stat)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
+
+ /* How many bytes were actually read? */
+#if MPI_VERSION >= 3
+ if (MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, MPI_BYTE, &bytes_read)))
+#else
+ if (MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read)))
+#endif
+ HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
+
+ /* Check for read failure */
+ if (bytes_read < 0 || bytes_read > size_i)
+ HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed")
+
+ /*
+ * If we didn't read the entire I/O, fill in zeroes beyond end of
+ * the physical MPI file and don't issue any more reads at higher
+ * addresses.
+ */
+ if ((n = (size_i - bytes_read)) > 0) {
+ HDmemset((char *)bufs[i] + bytes_read, 0, (size_t)n);
+ max_addr = addrs[i] + (haddr_t)bytes_read;
+ }
+ }
+ else {
+ /* Read is past the max address, fill in zeroes */
+ HDmemset((char *)bufs[i], 0, size);
+ }
}
}