diff options
Diffstat (limited to 'src/H5FDmpio.c')
-rw-r--r-- | src/H5FDmpio.c | 43 |
1 files changed, 37 insertions, 6 deletions
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index d160858..3ab90aa 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -1354,6 +1354,7 @@ H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, int n; #endif hbool_t use_view_this_time = FALSE; + hbool_t rank0_bcast = FALSE; /* If read-with-rank0-and-bcast flag was used */ herr_t ret_value = SUCCEED; FUNC_ENTER_STATIC @@ -1437,8 +1438,25 @@ H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, if(H5FD_mpio_Debug[(int)'r']) HDfprintf(stdout, "%s: doing MPI collective IO\n", FUNC); #endif - if(MPI_SUCCESS != (mpi_code = MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code) + /* Check whether we should read from rank 0 and broadcast to other ranks */ + if(H5CX_get_mpio_rank0_bcast()) { +#ifdef H5FDmpio_DEBUG + if(H5FD_mpio_Debug[(int)'r']) + HDfprintf(stdout, "%s: doing read-rank0-and-MPI_Bcast\n", FUNC); +#endif + /* Indicate path we've taken */ + rank0_bcast = TRUE; + + /* Read on rank 0 Bcast to other ranks */ + if(file->mpi_rank == 0) + if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code) + if(MPI_SUCCESS != (mpi_code = MPI_Bcast(buf, size_i, buf_type, 0, file->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) + } /* end if */ + else + if(MPI_SUCCESS != (mpi_code = MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code) } /* end if */ else { #ifdef H5FDmpio_DEBUG @@ -1460,13 +1478,26 @@ H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat))) HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code) - /* How many bytes were actually read? */ + /* Only retrieve bytes read if this rank _actually_ participated in I/O */ + if(!rank0_bcast || (rank0_bcast && file->mpi_rank == 0) ) { + /* How many bytes were actually read? */ #if MPI_VERSION >= 3 - if(MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_read))) + if(MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_read))) #else - if(MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read))) + if(MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read))) #endif - HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code) + HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code) + } /* end if */ + + /* If the rank0-bcast feature was used, broadcast the # of bytes read to + * other ranks, which didn't perform any I/O. + */ + /* NOTE: This could be optimized further to be combined with the broadcast + * of the data. (QAK - 2019/1/2) + */ + if(rank0_bcast) + if(MPI_SUCCESS != MPI_Bcast(&bytes_read, 1, MPI_LONG_LONG, 0, file->comm)) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", 0) /* Get the type's size */ #if MPI_VERSION >= 3 |