summaryrefslogtreecommitdiffstats
path: root/src/H5FDmpio.c
diff options
context:
space:
mode:
authorjhendersonHDF <jhenderson@hdfgroup.org>2022-03-25 21:31:06 (GMT)
committerGitHub <noreply@github.com>2022-03-25 21:31:06 (GMT)
commit15971fbd160ab061725f238379cc638ed37f05ef (patch)
tree1be26bd1be27c67ba674ddc39611d68959c3c207 /src/H5FDmpio.c
parentf73b4c618cb680f8be9b2f2510c34442ec24d713 (diff)
downloadhdf5-15971fbd160ab061725f238379cc638ed37f05ef.zip
hdf5-15971fbd160ab061725f238379cc638ed37f05ef.tar.gz
hdf5-15971fbd160ab061725f238379cc638ed37f05ef.tar.bz2
Hdf5 1 12 merges (#1528)
* Use internal version of H5Eprint2 to avoid possible stack overflow (#661) * Add support for parallel filters to h5repack (#832) * Allow parallel filters feature for comm size of 1 (#840) * Avoid popping API context when one wasn't pushed (#848) * Fix several warnings (#720) * Don't allow H5Pset(get)_all_coll_metadata_ops for DXPLs (#1201) * Fix free list tracking and cleanup cast alignment warnings (#1288) * Fix free list tracking and cleanup cast alignment warnings * Add free list tracking code to H5FL 'arr' routines * Fix usage of several HDfprintf format specifiers after HDfprintf removal (#1324) * Use appropriate printf format specifiers for haddr_t and hsize_t types directly (#1340) * Fix H5ACmpio dirty bytes creation debugging (#1357) * Fix documentation for H5D_space_status_t enum values (#1372) * Parallel rank0 deadlock fixes (#1183) * Fix several places where rank 0 can skip past collective MPI operations on failure * Committing clang-format changes Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> * Fix a few issues noted by LGTM (#1421) * Fix cache sanity checking code by moving functions to wider scope (#1435) * Fix metadata cache bug when resizing a pinned/protected entry (v2) (#1463) * Disable memory alloc sanity checks by default for Autotools debug builds (#1468) * Committing clang-format changes Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Diffstat (limited to 'src/H5FDmpio.c')
-rw-r--r--src/H5FDmpio.c45
1 files changed, 33 insertions, 12 deletions
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index c0b14ce..fe737c6 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -860,14 +860,19 @@ H5FD__mpio_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t H5_ATTR
file->mpi_size = mpi_size;
/* Only processor p0 will get the filesize and broadcast it. */
- if (mpi_rank == 0)
+ if (mpi_rank == 0) {
+ /* If MPI_File_get_size fails, broadcast file size as -1 to signal error */
if (MPI_SUCCESS != (mpi_code = MPI_File_get_size(fh, &file_size)))
- HMPI_GOTO_ERROR(NULL, "MPI_File_get_size failed", mpi_code)
+ file_size = (MPI_Offset)-1;
+ }
/* Broadcast file size */
if (MPI_SUCCESS != (mpi_code = MPI_Bcast(&file_size, (int)sizeof(MPI_Offset), MPI_BYTE, 0, comm)))
HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code)
+ if (file_size < 0)
+ HMPI_GOTO_ERROR(NULL, "MPI_File_get_size failed", mpi_code)
+
/* Determine if the file should be truncated */
if (file_size && (flags & H5F_ACC_TRUNC)) {
/* Truncate the file */
@@ -1264,10 +1269,14 @@ H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, hid_t H5_ATTR_UNU
rank0_bcast = TRUE;
/* Read on rank 0 Bcast to other ranks */
- if (file->mpi_rank == 0)
+ if (file->mpi_rank == 0) {
+ /* If MPI_File_read_at fails, push an error, but continue
+ * to participate in following MPI_Bcast */
if (MPI_SUCCESS !=
(mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
+ HMPI_DONE_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
+ }
+
if (MPI_SUCCESS != (mpi_code = MPI_Bcast(buf, size_i, buf_type, 0, file->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
} /* end if */
@@ -1311,11 +1320,21 @@ H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, hid_t H5_ATTR_UNU
if (!rank0_bcast || (rank0_bcast && file->mpi_rank == 0)) {
/* How many bytes were actually read? */
#if MPI_VERSION >= 3
- if (MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_read)))
+ if (MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_read))) {
#else
- if (MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read)))
+ if (MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read))) {
#endif
- HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
+ if (rank0_bcast && file->mpi_rank == 0) {
+ /* If MPI_Get_elements(_x) fails for a rank 0 bcast strategy,
+ * push an error, but continue to participate in the following
+ * MPI_Bcast.
+ */
+ bytes_read = -1;
+ HMPI_DONE_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
+ }
+ else
+ HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
+ }
} /* end if */
/* If the rank0-bcast feature was used, broadcast the # of bytes read to
@@ -1695,17 +1714,19 @@ H5FD__mpio_truncate(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, hbool_t H5_ATTR
HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code)
/* Only processor p0 will get the filesize and broadcast it. */
- /* (Note that throwing an error here will cause non-rank 0 processes
- * to hang in following Bcast. -QAK, 3/17/2018)
- */
- if (0 == file->mpi_rank)
+ if (0 == file->mpi_rank) {
+ /* If MPI_File_get_size fails, broadcast file size as -1 to signal error */
if (MPI_SUCCESS != (mpi_code = MPI_File_get_size(file->f, &size)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_get_size failed", mpi_code)
+ size = (MPI_Offset)-1;
+ }
/* Broadcast file size */
if (MPI_SUCCESS != (mpi_code = MPI_Bcast(&size, (int)sizeof(MPI_Offset), MPI_BYTE, 0, file->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
+ if (size < 0)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_get_size failed", mpi_code)
+
if (H5FD_mpi_haddr_to_MPIOff(file->eoa, &needed_eof) < 0)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "cannot convert from haddr_t to MPI_Offset")