summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@lbl.gov>2020-08-20 19:31:35 (GMT)
committerQuincey Koziol <koziol@lbl.gov>2020-08-21 21:18:17 (GMT)
commite3fb9cdfb6d762af74513be8cceee98c03560b24 (patch)
tree39b4cb977ae7f657dba74bf9a51064940cc9a72c
parentbebf0a9fd63f060b9579c6877ad16deb5bc5dc2f (diff)
downloadhdf5-e3fb9cdfb6d762af74513be8cceee98c03560b24.zip
hdf5-e3fb9cdfb6d762af74513be8cceee98c03560b24.tar.gz
hdf5-e3fb9cdfb6d762af74513be8cceee98c03560b24.tar.bz2
Avoid creating MPI datatypes on ranks with 0 chunks to write'
-rw-r--r--src/H5Dchunk.c121
1 files changed, 68 insertions, 53 deletions
diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c
index ee83564..f4c1c8a 100644
--- a/src/H5Dchunk.c
+++ b/src/H5Dchunk.c
@@ -4961,6 +4961,7 @@ H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_info_t *chunk_info,
H5FD_mpio_xfer_t prev_xfer_mode; /* Previous data xfer mode */
hbool_t have_xfer_mode = FALSE; /* Whether the previous xffer mode has been retrieved */
hbool_t need_addr_sort = FALSE;
+ hbool_t created_mpi_datatypes = FALSE; /* Whether MPI datatypes were created */
int i; /* Local index variable */
herr_t ret_value = SUCCEED; /* Return value */
@@ -4983,9 +4984,9 @@ H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_info_t *chunk_info,
HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "Resulted in division by zero")
num_blocks = (size_t)(chunk_info->num_io / (size_t)mpi_size); /* value should be the same on all procs */
- /* after evenly distributing the blocks between processes, are
- there any leftover blocks for each individual process
- (round-robin) */
+ /* After evenly distributing the blocks between processes, are there any
+ * leftover blocks for each individual process (round-robin)?
+ */
leftover_blocks = (size_t)(chunk_info->num_io % (size_t)mpi_size);
/* Cast values to types needed by MPI */
@@ -4993,58 +4994,70 @@ H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_info_t *chunk_info,
H5_CHECKED_ASSIGN(leftover, int, leftover_blocks, size_t);
H5_CHECKED_ASSIGN(block_len, int, chunk_size, size_t);
- /* Allocate buffers */
- /* (MSC - should not need block_lens if MPI_type_create_hindexed_block is working) */
- if(NULL == (block_lens = (int *)H5MM_malloc((size_t)(blocks + 1) * sizeof(int))))
- HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk lengths buffer")
- if(NULL == (chunk_disp_array = (MPI_Aint *)H5MM_malloc((size_t)(blocks + 1) * sizeof(MPI_Aint))))
- HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk file displacement buffer")
+ /* Check if we have any chunks to write on this rank */
+ if(num_blocks > 0 || (leftover && leftover > mpi_rank)) {
+ /* Allocate buffers */
+ /* (MSC - should not need block_lens if MPI_type_create_hindexed_block is working) */
+ if(NULL == (block_lens = (int *)H5MM_malloc((size_t)(blocks + 1) * sizeof(int))))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk lengths buffer")
+ if(NULL == (chunk_disp_array = (MPI_Aint *)H5MM_malloc((size_t)(blocks + 1) * sizeof(MPI_Aint))))
+ HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk file displacement buffer")
- for(i = 0 ; i < blocks ; i++) {
- /* store the chunk address as an MPI_Aint */
- chunk_disp_array[i] = (MPI_Aint)(chunk_info->addr[i + mpi_rank*blocks]);
+ for(i = 0 ; i < blocks ; i++) {
+ /* store the chunk address as an MPI_Aint */
+ chunk_disp_array[i] = (MPI_Aint)(chunk_info->addr[i + (mpi_rank * blocks)]);
- /* MSC - should not need this if MPI_type_create_hindexed_block is working */
- block_lens[i] = block_len;
+ /* MSC - should not need this if MPI_type_create_hindexed_block is working */
+ block_lens[i] = block_len;
- /* make sure that the addresses in the datatype are
- monotonically non decreasing */
- if(i && (chunk_disp_array[i] < chunk_disp_array[i - 1]))
- need_addr_sort = TRUE;
- } /* end for */
+ /* Make sure that the addresses in the datatype are
+ * monotonically non-decreasing
+ */
+ if(i && (chunk_disp_array[i] < chunk_disp_array[i - 1]))
+ need_addr_sort = TRUE;
+ } /* end for */
- /* calculate if there are any leftover blocks after evenly
- distributing. If there are, then round robin the distribution
- to processes 0 -> leftover. */
- if(leftover && leftover > mpi_rank) {
- chunk_disp_array[blocks] = (MPI_Aint)chunk_info->addr[blocks*mpi_size + mpi_rank];
- if(blocks && (chunk_disp_array[blocks] < chunk_disp_array[blocks - 1]))
- need_addr_sort = TRUE;
- block_lens[blocks] = block_len;
- blocks++;
- }
+ /* Calculate if there are any leftover blocks after evenly
+ * distributing. If there are, then round-robin the distribution
+ * to processes 0 -> leftover.
+ */
+ if(leftover && leftover > mpi_rank) {
+ chunk_disp_array[blocks] = (MPI_Aint)chunk_info->addr[(blocks * mpi_size) + mpi_rank];
+ if(blocks && (chunk_disp_array[blocks] < chunk_disp_array[blocks - 1]))
+ need_addr_sort = TRUE;
+ block_lens[blocks] = block_len;
+ blocks++;
+ }
- /*
- * Ensure that the blocks are sorted in monotonically non-decreasing
- * order of offset in the file.
- */
- if(need_addr_sort)
- HDqsort(chunk_disp_array, blocks, sizeof(MPI_Aint), H5D__chunk_cmp_addr);
+ /* Ensure that the blocks are sorted in monotonically non-decreasing
+ * order of offset in the file.
+ */
+ if(need_addr_sort)
+ HDqsort(chunk_disp_array, blocks, sizeof(MPI_Aint), H5D__chunk_cmp_addr);
- /* MSC - should use this if MPI_type_create_hindexed block is working:
- * mpi_code = MPI_Type_create_hindexed_block(blocks, block_len, chunk_disp_array, MPI_BYTE, &file_type);
- */
- mpi_code = MPI_Type_create_hindexed(blocks, block_lens, chunk_disp_array, MPI_BYTE, &file_type);
- if(mpi_code != MPI_SUCCESS)
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
- if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(&file_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
-
- mpi_code = MPI_Type_create_hvector(blocks, block_len, 0, MPI_BYTE, &mem_type);
- if(mpi_code != MPI_SUCCESS)
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
- if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(&mem_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
+ /* MSC - should use this if MPI_type_create_hindexed block is working:
+ * mpi_code = MPI_Type_create_hindexed_block(blocks, block_len, chunk_disp_array, MPI_BYTE, &file_type);
+ */
+ mpi_code = MPI_Type_create_hindexed(blocks, block_lens, chunk_disp_array, MPI_BYTE, &file_type);
+ if(mpi_code != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(&file_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
+
+ mpi_code = MPI_Type_create_hvector(blocks, block_len, 0, MPI_BYTE, &mem_type);
+ if(mpi_code != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(&mem_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
+
+ /* Indicate that the MPI types were created */
+ created_mpi_datatypes = TRUE;
+ } /* end if */
+ else {
+ /* Set up file & memory MPI types, to participate in collective write */
+ file_type = MPI_BYTE;
+ mem_type = MPI_BYTE;
+ } /* end else */
/* Set MPI-IO VFD properties */
@@ -5076,10 +5089,12 @@ done:
HDONE_ERROR(H5E_DATASET, H5E_CANTSET, FAIL, "can't set transfer mode")
/* free things */
- if(MPI_SUCCESS != (mpi_code = MPI_Type_free(&file_type)))
- HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
- if(MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type)))
- HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+ if(created_mpi_datatypes) {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_free(&file_type)))
+ HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type)))
+ HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+ } /* end if */
H5MM_xfree(chunk_disp_array);
H5MM_xfree(block_lens);