summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJordan Henderson <jhenderson@hdfgroup.org>2018-07-16 14:37:54 (GMT)
committerJordan Henderson <jhenderson@hdfgroup.org>2018-07-16 14:37:54 (GMT)
commitf649be9fdc9add7a12aa5c8290b9bf8a45d49a56 (patch)
tree15953e85d2e4e79203c80ae12e264f1702c2ab32 /src
parent518f4af90058d44fa5557d1a3509afc947ec80d7 (diff)
parenta8d6f100cdddbfc42d0c4abfeb4ceb8788b1b087 (diff)
downloadhdf5-f649be9fdc9add7a12aa5c8290b9bf8a45d49a56.zip
hdf5-f649be9fdc9add7a12aa5c8290b9bf8a45d49a56.tar.gz
hdf5-f649be9fdc9add7a12aa5c8290b9bf8a45d49a56.tar.bz2
Merge pull request #1127 in HDFFV/hdf5 from ~JHENDERSON/hdf5:develop to develop
* commit 'a8d6f100cdddbfc42d0c4abfeb4ceb8788b1b087': Add note about single chunk caching and serial library Add check for actually using the MPI file driver when caching one chunk Fix error message mentioning wrong MPI function used Fix for HDFFV-10509 Revise H5D__mpio_array_gatherv() to not allocate memory needlessly Add test to continually grow and shrink chunks Changes to test with checksum filter as well as deflate filter Eliminate warning about signed to unsigned conversion Remove unused local variable Fix bug in parallel reads of compressed data Add data verification to parallel filtered compound write tests Add seven of fourteen parallel filtered data partial read tests
Diffstat (limited to 'src')
-rw-r--r--src/H5Dchunk.c37
-rw-r--r--src/H5Dmpio.c149
2 files changed, 133 insertions, 53 deletions
diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c
index e3f6410..e64a60f 100644
--- a/src/H5Dchunk.c
+++ b/src/H5Dchunk.c
@@ -2933,8 +2933,41 @@ H5D__chunk_lookup(const H5D_t *dset, const hsize_t *scaled,
H5F_set_coll_md_read(idx_info.f, temp_cmr);
#endif /* H5_HAVE_PARALLEL */
- /* Cache the information retrieved */
- H5D__chunk_cinfo_cache_update(&dset->shared->cache.chunk.last, udata);
+ /*
+ * Cache the information retrieved.
+ *
+ * Note that if we are writing to the dataset in parallel and filters
+ * are involved, we skip caching this information as it is highly likely
+ * that the chunk information will be invalidated as a result of the
+ * filter operation (e.g. the chunk gets re-allocated to a different
+ * address in the file and/or gets re-allocated with a different size).
+ * If we were to cache this information, subsequent reads/writes would
+ * retrieve the invalid information and cause a variety of issues.
+ *
+ * It has been verified that in the serial library, when writing to chunks
+ * with the real chunk cache disabled and with filters involved, the
+ * functions within this file are correctly called in such a manner that
+ * this single chunk cache is always updated correctly. Therefore, this
+ * check is not needed for the serial library.
+ *
+ * This is an ugly and potentially frail check, but the
+ * H5D__chunk_cinfo_cache_reset() function is not currently available
+ * to functions outside of this file, so outside functions can not
+ * invalidate this single chunk cache. Even if the function were available,
+ * this check prevents us from doing the work of going through and caching
+ * each chunk in the write operation, when we're only going to invalidate
+ * the cache at the end of a parallel write anyway.
+ *
+ * - JTH (7/13/2018)
+ */
+#ifdef H5_HAVE_PARALLEL
+ if ( !( (H5F_HAS_FEATURE(idx_info.f, H5FD_FEAT_HAS_MPI))
+ && (H5F_INTENT(dset->oloc.file) & H5F_ACC_RDWR)
+ && dset->shared->dcpl_cache.pline.nused
+ )
+ )
+#endif
+ H5D__chunk_cinfo_cache_update(&dset->shared->cache.chunk.last, udata);
} /* end if */
} /* end else */
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c
index bc37840..d721ae6 100644
--- a/src/H5Dmpio.c
+++ b/src/H5Dmpio.c
@@ -234,7 +234,7 @@ static herr_t H5D__chunk_redistribute_shared_chunks(const H5D_io_info_t *io_info
H5D_filtered_collective_io_info_t *local_chunk_array, size_t *local_chunk_array_num_entries);
static herr_t H5D__mpio_array_gatherv(void *local_array, size_t local_array_num_entries,
size_t array_entry_size, void **gathered_array, size_t *gathered_array_num_entries,
- int nprocs, hbool_t allgather, int root, MPI_Comm comm, int (*sort_func)(const void *, const void *));
+ hbool_t allgather, int root, MPI_Comm comm, int (*sort_func)(const void *, const void *));
static herr_t H5D__mpio_filtered_collective_write_type(
H5D_filtered_collective_io_info_t *chunk_list, size_t num_entries,
MPI_Datatype *new_mem_type, hbool_t *mem_type_derived,
@@ -418,19 +418,16 @@ done:
* Function: H5D__mpio_array_gatherv
*
* Purpose: Given an array, specified in local_array, by each processor
- * calling this function, gathers each array into a single
+ * calling this function, collects each array into a single
* array which is then either gathered to the processor
* specified by root, when allgather is false, or is
* distributed back to all processors when allgather is true.
*
- * The size of each entry and number of entries in the array
- * contributed by an individual processor should be specified
- * in array_entry_size and local_array_num_entries,
+ * The number of entries in the array contributed by an
+ * individual processor and the size of each entry should be
+ * specified in local_array_num_entries and array_entry_size,
* respectively.
*
- * The number of processors participating in the gather
- * operation should be specified for nprocs.
- *
* The MPI communicator to use should be specified for comm.
*
* If the sort_func argument is supplied, the array is sorted
@@ -448,14 +445,13 @@ done:
static herr_t
H5D__mpio_array_gatherv(void *local_array, size_t local_array_num_entries,
size_t array_entry_size, void **_gathered_array, size_t *_gathered_array_num_entries,
- int nprocs, hbool_t allgather, int root, MPI_Comm comm, int (*sort_func)(const void *, const void *))
+ hbool_t allgather, int root, MPI_Comm comm, int (*sort_func)(const void *, const void *))
{
size_t gathered_array_num_entries = 0; /* The size of the newly-constructed array */
- size_t i;
void *gathered_array = NULL; /* The newly-constructed array returned to the caller */
- int *receive_counts_array = NULL; /* Array containing number of entries each process is contributing */
- int *displacements_array = NULL; /* Array of displacements where each process places its data in the final array */
- int mpi_code;
+ int *receive_counts_array = NULL; /* Array containing number of entries each processor is contributing */
+ int *displacements_array = NULL; /* Array of displacements where each processor places its data in the final array */
+ int mpi_code, mpi_rank, mpi_size;
int sendcount;
herr_t ret_value = SUCCEED;
@@ -464,34 +460,62 @@ H5D__mpio_array_gatherv(void *local_array, size_t local_array_num_entries,
HDassert(_gathered_array);
HDassert(_gathered_array_num_entries);
- /* Determine the size of the end result array */
+ MPI_Comm_size(comm, &mpi_size);
+ MPI_Comm_rank(comm, &mpi_rank);
+
+ /*
+ * Determine the size of the end result array by collecting the number
+ * of entries contributed by each processor into a single total.
+ */
if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_array_num_entries, &gathered_array_num_entries, 1, MPI_INT, MPI_SUM, comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code)
- /* If 0 entries resulted from the collective operation, no one is writing anything */
+ /* If 0 entries resulted from the collective operation, no processor is contributing anything and there is nothing to do */
if (gathered_array_num_entries > 0) {
- if (NULL == (gathered_array = H5MM_malloc(gathered_array_num_entries * array_entry_size)))
- HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate gathered array")
+ /*
+ * If gathering to all processors, all processors need to allocate space for the resulting array, as well as
+ * the receive counts and displacements arrays for the collective MPI_Allgatherv call. Otherwise, only the
+ * root processor needs to allocate the space for an MPI_Gatherv call.
+ */
+ if (allgather || (mpi_rank == root)) {
+ if (NULL == (gathered_array = H5MM_malloc(gathered_array_num_entries * array_entry_size)))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate gathered array")
- if (NULL == (receive_counts_array = (int *) H5MM_malloc((size_t) nprocs * sizeof(int))))
- HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate receive counts array")
+ if (NULL == (receive_counts_array = (int *) H5MM_malloc((size_t) mpi_size * sizeof(int))))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate receive counts array")
- if (NULL == (displacements_array = (int *) H5MM_malloc((size_t) nprocs * sizeof(int))))
- HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate receive displacements array")
+ if (NULL == (displacements_array = (int *) H5MM_malloc((size_t) mpi_size * sizeof(int))))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate receive displacements array")
+ } /* end if */
- /* Inform each process of how many entries each other process is contributing to the resulting array */
- if (MPI_SUCCESS != (mpi_code = MPI_Allgather(&local_array_num_entries, 1, MPI_INT, receive_counts_array, 1, MPI_INT, comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code)
+ /*
+ * If gathering to all processors, inform each processor of how many entries each other processor is
+ * contributing to the resulting array by collecting the counts into each processor's "receive counts"
+ * array. Otherwise, inform only the root processor of how many entries each other processor is contributing.
+ */
+ if (allgather) {
+ if (MPI_SUCCESS != (mpi_code = MPI_Allgather(&local_array_num_entries, 1, MPI_INT, receive_counts_array, 1, MPI_INT, comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code)
+ } /* end if */
+ else {
+ if (MPI_SUCCESS != (mpi_code = MPI_Gather(&local_array_num_entries, 1, MPI_INT, receive_counts_array, 1, MPI_INT, root, comm)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Gather failed", mpi_code)
+ } /* end else */
- /* Multiply each receive count by the size of the array entry, since the data is sent as bytes */
- for (i = 0; i < (size_t) nprocs; i++)
- H5_CHECKED_ASSIGN(receive_counts_array[i], int, (size_t) receive_counts_array[i] * array_entry_size, size_t);
+ if (allgather || (mpi_rank == root)) {
+ size_t i;
- /* Set receive buffer offsets for MPI_Allgatherv */
- displacements_array[0] = 0;
- for (i = 1; i < (size_t) nprocs; i++)
- displacements_array[i] = displacements_array[i - 1] + receive_counts_array[i - 1];
+ /* Multiply each receive count by the size of the array entry, since the data is sent as bytes. */
+ for (i = 0; i < (size_t) mpi_size; i++)
+ H5_CHECKED_ASSIGN(receive_counts_array[i], int, (size_t) receive_counts_array[i] * array_entry_size, size_t);
+ /* Set receive buffer offsets for the collective MPI_Allgatherv/MPI_Gatherv call. */
+ displacements_array[0] = 0;
+ for (i = 1; i < (size_t) mpi_size; i++)
+ displacements_array[i] = displacements_array[i - 1] + receive_counts_array[i - 1];
+ } /* end if */
+
+ /* As the data is sent as bytes, calculate the true sendcount for the data. */
H5_CHECKED_ASSIGN(sendcount, int, local_array_num_entries * array_entry_size, size_t);
if (allgather) {
@@ -502,10 +526,11 @@ H5D__mpio_array_gatherv(void *local_array, size_t local_array_num_entries,
else {
if (MPI_SUCCESS != (mpi_code = MPI_Gatherv(local_array, sendcount, MPI_BYTE,
gathered_array, receive_counts_array, displacements_array, MPI_BYTE, root, comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Allgatherv failed", mpi_code)
+ HMPI_GOTO_ERROR(FAIL, "MPI_Gatherv failed", mpi_code)
} /* end else */
- if (sort_func) HDqsort(gathered_array, gathered_array_num_entries, array_entry_size, sort_func);
+ if (sort_func && (allgather || (mpi_rank == root)))
+ HDqsort(gathered_array, gathered_array_num_entries, array_entry_size, sort_func);
} /* end if */
*_gathered_array = gathered_array;
@@ -1295,8 +1320,7 @@ H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_in
* of the chunks in the file.
*/
if (H5D__mpio_array_gatherv(chunk_list, chunk_list_num_entries, sizeof(H5D_filtered_collective_io_info_t),
- (void **) &collective_chunk_list, &collective_chunk_list_num_entries, mpi_size,
- true, 0, io_info->comm, NULL) < 0)
+ (void **) &collective_chunk_list, &collective_chunk_list_num_entries, true, 0, io_info->comm, NULL) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, "couldn't gather new chunk sizes")
/* Collectively re-allocate the modified chunks (from each process) in the file */
@@ -1768,8 +1792,7 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_i
* of the chunks in the file
*/
if (H5D__mpio_array_gatherv(&chunk_list[i], have_chunk_to_process ? 1 : 0, sizeof(H5D_filtered_collective_io_info_t),
- (void **) &collective_chunk_list, &collective_chunk_list_num_entries, mpi_size,
- true, 0, io_info->comm, NULL) < 0)
+ (void **) &collective_chunk_list, &collective_chunk_list_num_entries, true, 0, io_info->comm, NULL) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, "couldn't gather new chunk sizes")
/* Participate in the collective re-allocation of all chunks modified
@@ -2655,8 +2678,8 @@ H5D__chunk_redistribute_shared_chunks(const H5D_io_info_t *io_info, const H5D_ty
* call, the gathered list will initially be sorted in increasing order of chunk offset in the file.
*/
if (H5D__mpio_array_gatherv(local_chunk_array, *local_chunk_array_num_entries, sizeof(H5D_filtered_collective_io_info_t),
- (void **) &shared_chunks_info_array, &shared_chunks_info_array_num_entries, mpi_size,
- false, 0, io_info->comm, H5D__cmp_filtered_collective_io_info_entry) < 0)
+ (void **) &shared_chunks_info_array, &shared_chunks_info_array_num_entries, false, 0,
+ io_info->comm, H5D__cmp_filtered_collective_io_info_entry) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, "couldn't gather array")
/* Rank 0 redistributes any shared chunks to new owners as necessary */
@@ -2981,7 +3004,7 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk
{
H5D_chunk_info_t *chunk_info = NULL;
H5S_sel_iter_t *mem_iter = NULL; /* Memory iterator for H5D__scatter_mem/H5D__gather_mem */
- unsigned char *mod_data = NULL; /* Chunk modification data sent by a process to a chunk's owner */
+ H5S_sel_iter_t *file_iter = NULL;
H5Z_EDC_t err_detect; /* Error detection info */
H5Z_cb_t filter_cb; /* I/O filter callback function */
unsigned filter_mask = 0;
@@ -2989,11 +3012,13 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk
hssize_t extent_npoints;
hsize_t true_chunk_size;
hbool_t mem_iter_init = FALSE;
+ hbool_t file_iter_init = FALSE;
size_t buf_size;
size_t i;
H5S_t *dataspace = NULL; /* Other process' dataspace for the chunk */
- void *tmp_gath_buf = NULL; /* Temporary gather buffer for owner of the chunk to gather into from
- application write buffer before scattering out to the chunk data buffer */
+ void *tmp_gath_buf = NULL; /* Temporary gather buffer to gather into from application buffer
+ before scattering out to the chunk data buffer (when writing data),
+ or vice versa (when reading data) */
int mpi_code;
herr_t ret_value = SUCCEED;
@@ -3073,9 +3098,6 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to initialize memory selection information")
mem_iter_init = TRUE;
- if ((iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace)) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid")
-
/* If this is a read operation, scatter the read chunk data to the user's buffer.
*
* If this is a write operation, update the chunk data buffer with the modifications
@@ -3084,16 +3106,39 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk
*/
switch (io_info->op_type) {
case H5D_IO_OP_READ:
- if (H5D__scatter_mem(chunk_entry->buf, chunk_info->mspace, mem_iter, (size_t)iter_nelmts, io_info->u.rbuf) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't scatter to read buffer")
+ if (NULL == (file_iter = (H5S_sel_iter_t *) H5MM_malloc(sizeof(H5S_sel_iter_t))))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate file iterator")
+
+ if (H5S_select_iter_init(file_iter, chunk_info->fspace, type_info->src_type_size) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to initialize memory selection information")
+ file_iter_init = TRUE;
+
+ if ((iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->fspace)) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid")
+
+ if (NULL == (tmp_gath_buf = H5MM_malloc((hsize_t) iter_nelmts * type_info->src_type_size)))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate temporary gather buffer")
+
+ if (!H5D__gather_mem(chunk_entry->buf, chunk_info->fspace, file_iter, (size_t) iter_nelmts, tmp_gath_buf))
+ HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "couldn't gather from chunk buffer")
+
+ if ((iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace)) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid")
+
+ if (H5D__scatter_mem(tmp_gath_buf, chunk_info->mspace, mem_iter, (size_t) iter_nelmts, io_info->u.rbuf) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't scatter to read buffer")
+
break;
case H5D_IO_OP_WRITE:
+ if ((iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace)) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid")
+
if (NULL == (tmp_gath_buf = H5MM_malloc((hsize_t) iter_nelmts * type_info->src_type_size)))
HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate temporary gather buffer")
/* Gather modification data from the application write buffer into a temporary buffer */
- if(!H5D__gather_mem(io_info->u.wbuf, chunk_info->mspace, mem_iter, (size_t)iter_nelmts, tmp_gath_buf))
+ if(!H5D__gather_mem(io_info->u.wbuf, chunk_info->mspace, mem_iter, (size_t) iter_nelmts, tmp_gath_buf))
HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "couldn't gather from write buffer")
if (H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
@@ -3111,7 +3156,7 @@ H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk
/* Scatter the owner's modification data into the chunk data buffer according to
* the file space.
*/
- if(H5D__scatter_mem(tmp_gath_buf, chunk_info->fspace, mem_iter, (size_t)iter_nelmts, chunk_entry->buf) < 0)
+ if(H5D__scatter_mem(tmp_gath_buf, chunk_info->fspace, mem_iter, (size_t) iter_nelmts, chunk_entry->buf) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't scatter to chunk data buffer")
if (H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
@@ -3177,10 +3222,12 @@ done:
H5MM_free(chunk_entry->async_info.receive_buffer_array);
if (chunk_entry->async_info.receive_requests_array)
H5MM_free(chunk_entry->async_info.receive_requests_array);
- if (mod_data)
- H5MM_free(mod_data);
if (tmp_gath_buf)
H5MM_free(tmp_gath_buf);
+ if (file_iter_init && H5S_SELECT_ITER_RELEASE(file_iter) < 0)
+ HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator")
+ if (file_iter)
+ H5MM_free(file_iter);
if (mem_iter_init && H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator")
if (mem_iter)