From cb0a883f6d3ce976f8d485c8e44996d7264fce54 Mon Sep 17 00:00:00 2001 From: Quincey Koziol Date: Fri, 26 Mar 2021 15:14:09 -0500 Subject: Minor parallel improvements (#519) * Improve MPI error reporting, handled failed operations in parallel tests more nicely, and clean up MPI_Allreduce for determining whether to break collective I/O * Committing clang-format changes Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- src/H5Dmpio.c | 2 +- src/H5Eprivate.h | 13 ++++------- testpar/t_dset.c | 67 ++++++++++++++++++++++++++++------------------------- testpar/testphdf5.c | 3 --- testpar/testphdf5.h | 12 ++++------ 5 files changed, 46 insertions(+), 51 deletions(-) diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c index 5f8a586..448e92d 100644 --- a/src/H5Dmpio.c +++ b/src/H5Dmpio.c @@ -413,7 +413,7 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space, co * collective I/O */ if (MPI_SUCCESS != - (mpi_code = MPI_Allreduce(&local_cause, &global_cause, 2, MPI_UNSIGNED, MPI_BOR, io_info->comm))) + (mpi_code = MPI_Allreduce(local_cause, global_cause, 2, MPI_UNSIGNED, MPI_BOR, io_info->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code) } /* end else */ diff --git a/src/H5Eprivate.h b/src/H5Eprivate.h index c3c440f..4cd8b70 100644 --- a/src/H5Eprivate.h +++ b/src/H5Eprivate.h @@ -168,20 +168,15 @@ typedef struct H5E_t H5E_t; extern char H5E_mpi_error_str[MPI_MAX_ERROR_STRING]; extern int H5E_mpi_error_str_len; -#define HMPI_ERROR(mpierr) \ - { \ - MPI_Error_string(mpierr, H5E_mpi_error_str, &H5E_mpi_error_str_len); \ - HERROR(H5E_INTERNAL, H5E_MPIERRSTR, "%s", H5E_mpi_error_str); \ - } #define HMPI_DONE_ERROR(retcode, str, mpierr) \ { \ - HMPI_ERROR(mpierr); \ - HDONE_ERROR(H5E_INTERNAL, H5E_MPI, retcode, str); \ + MPI_Error_string(mpierr, H5E_mpi_error_str, &H5E_mpi_error_str_len); \ + HDONE_ERROR(H5E_INTERNAL, H5E_MPI, retcode, "%s: MPI error string is '%s'", str, H5E_mpi_error_str); \ } #define HMPI_GOTO_ERROR(retcode, str, mpierr) \ { \ - HMPI_ERROR(mpierr); \ - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, retcode, str); \ + MPI_Error_string(mpierr, H5E_mpi_error_str, &H5E_mpi_error_str_len); \ + HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, retcode, "%s: MPI error string is '%s'", str, H5E_mpi_error_str); \ } #endif /* H5_HAVE_PARALLEL */ diff --git a/testpar/t_dset.c b/testpar/t_dset.c index 3e4a304..bbd4b28 100644 --- a/testpar/t_dset.c +++ b/testpar/t_dset.c @@ -1627,9 +1627,6 @@ extend_writeInd(void) VRFY((mem_dataspace >= 0), ""); /* Try write to dataset2 beyond its current dim sizes. Should fail. */ - /* Temporary turn off auto error reporting */ - H5Eget_auto2(H5E_DEFAULT, &old_func, &old_client_data); - H5Eset_auto2(H5E_DEFAULT, NULL, NULL); /* create a file dataspace independently */ file_dataspace = H5Dget_space(dataset2); @@ -1638,11 +1635,13 @@ extend_writeInd(void) VRFY((ret >= 0), "H5Sset_hyperslab succeeded"); /* write data independently. Should fail. */ - ret = H5Dwrite(dataset2, H5T_NATIVE_INT, mem_dataspace, file_dataspace, H5P_DEFAULT, data_array1); + H5E_BEGIN_TRY + { + ret = H5Dwrite(dataset2, H5T_NATIVE_INT, mem_dataspace, file_dataspace, H5P_DEFAULT, data_array1); + } + H5E_END_TRY VRFY((ret < 0), "H5Dwrite failed as expected"); - /* restore auto error reporting */ - H5Eset_auto2(H5E_DEFAULT, old_func, old_client_data); H5Sclose(file_dataspace); /* Extend dataset2 and try again. Should succeed. */ @@ -1911,20 +1910,19 @@ extend_readInd(void) VRFY((dataset2 >= 0), ""); /* Try extend dataset1 which is open RDONLY. Should fail. */ - /* first turn off auto error reporting */ - H5Eget_auto2(H5E_DEFAULT, &old_func, &old_client_data); - H5Eset_auto2(H5E_DEFAULT, NULL, NULL); file_dataspace = H5Dget_space(dataset1); VRFY((file_dataspace >= 0), "H5Dget_space succeeded"); ret = H5Sget_simple_extent_dims(file_dataspace, dims, NULL); VRFY((ret > 0), "H5Sget_simple_extent_dims succeeded"); dims[0]++; - ret = H5Dset_extent(dataset1, dims); + H5E_BEGIN_TRY + { + ret = H5Dset_extent(dataset1, dims); + } + H5E_END_TRY VRFY((ret < 0), "H5Dset_extent failed as expected"); - /* restore auto error reporting */ - H5Eset_auto2(H5E_DEFAULT, old_func, old_client_data); H5Sclose(file_dataspace); /* Read dataset1 using BYROW pattern */ @@ -2209,9 +2207,6 @@ extend_writeAll(void) } /* Try write to dataset2 beyond its current dim sizes. Should fail. */ - /* Temporary turn off auto error reporting */ - H5Eget_auto2(H5E_DEFAULT, &old_func, &old_client_data); - H5Eset_auto2(H5E_DEFAULT, NULL, NULL); /* create a file dataspace independently */ file_dataspace = H5Dget_space(dataset2); @@ -2220,11 +2215,13 @@ extend_writeAll(void) VRFY((ret >= 0), "H5Sset_hyperslab succeeded"); /* write data independently. Should fail. */ - ret = H5Dwrite(dataset2, H5T_NATIVE_INT, mem_dataspace, file_dataspace, xfer_plist, data_array1); + H5E_BEGIN_TRY + { + ret = H5Dwrite(dataset2, H5T_NATIVE_INT, mem_dataspace, file_dataspace, xfer_plist, data_array1); + } + H5E_END_TRY VRFY((ret < 0), "H5Dwrite failed as expected"); - /* restore auto error reporting */ - H5Eset_auto2(H5E_DEFAULT, old_func, old_client_data); H5Sclose(file_dataspace); /* Extend dataset2 and try again. Should succeed. */ @@ -2331,20 +2328,19 @@ extend_readAll(void) VRFY((dataset2 >= 0), ""); /* Try extend dataset1 which is open RDONLY. Should fail. */ - /* first turn off auto error reporting */ - H5Eget_auto2(H5E_DEFAULT, &old_func, &old_client_data); - H5Eset_auto2(H5E_DEFAULT, NULL, NULL); file_dataspace = H5Dget_space(dataset1); VRFY((file_dataspace >= 0), "H5Dget_space succeeded"); ret = H5Sget_simple_extent_dims(file_dataspace, dims, NULL); VRFY((ret > 0), "H5Sget_simple_extent_dims succeeded"); dims[0]++; - ret = H5Dset_extent(dataset1, dims); + H5E_BEGIN_TRY + { + ret = H5Dset_extent(dataset1, dims); + } + H5E_END_TRY VRFY((ret < 0), "H5Dset_extent failed as expected"); - /* restore auto error reporting */ - H5Eset_auto2(H5E_DEFAULT, old_func, old_client_data); H5Sclose(file_dataspace); /* Read dataset1 using BYROW pattern */ @@ -3321,14 +3317,23 @@ test_actual_io_mode(int selection_mode) /* Release some resources */ ret = H5Sclose(sid); + VRFY((ret >= 0), "H5Sclose succeeded"); ret = H5Pclose(fapl); + VRFY((ret >= 0), "H5Pclose succeeded"); ret = H5Pclose(dcpl); + VRFY((ret >= 0), "H5Pclose succeeded"); ret = H5Pclose(dxpl_write); + VRFY((ret >= 0), "H5Pclose succeeded"); ret = H5Pclose(dxpl_read); + VRFY((ret >= 0), "H5Pclose succeeded"); ret = H5Dclose(dataset); + VRFY((ret >= 0), "H5Dclose succeeded"); ret = H5Sclose(mem_space); + VRFY((ret >= 0), "H5Sclose succeeded"); ret = H5Sclose(file_space); + VRFY((ret >= 0), "H5Sclose succeeded"); ret = H5Fclose(fid); + VRFY((ret >= 0), "H5Fclose succeeded"); HDfree(buffer); return; } @@ -3344,9 +3349,7 @@ void actual_io_mode_tests(void) { int mpi_size = -1; - int mpi_rank = -1; MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); - MPI_Comm_size(MPI_COMM_WORLD, &mpi_rank); test_actual_io_mode(TEST_ACTUAL_IO_NO_COLLECTIVE); @@ -4112,13 +4115,15 @@ dataset_atomicity(void) if (MAINPROCESS) { fid = H5Fopen(filename, H5F_ACC_RDWR, H5P_DEFAULT); VRFY((fid >= 0), "H5Fopen succeeed"); - } - /* should fail */ - ret = H5Fset_mpi_atomicity(fid, TRUE); - VRFY((ret == FAIL), "H5Fset_mpi_atomicity failed"); + /* should fail */ + H5E_BEGIN_TRY + { + ret = H5Fset_mpi_atomicity(fid, TRUE); + } + H5E_END_TRY + VRFY((ret == FAIL), "H5Fset_mpi_atomicity failed"); - if (MAINPROCESS) { ret = H5Fclose(fid); VRFY((ret >= 0), "H5Fclose succeeded"); } diff --git a/testpar/testphdf5.c b/testpar/testphdf5.c index f5b9e63..e24e1e4 100644 --- a/testpar/testphdf5.c +++ b/testpar/testphdf5.c @@ -33,9 +33,6 @@ int ngroups = 512; /* number of groups to create in root int facc_type = FACC_MPIO; /*Test file access type */ int dxfer_coll_type = DXFER_COLLECTIVE_IO; -H5E_auto2_t old_func; /* previous error handler */ -void * old_client_data; /* previous error handler arg.*/ - /* other option flags */ /* FILENAME and filenames must have the same number of names. diff --git a/testpar/testphdf5.h b/testpar/testphdf5.h index bd8de08..007fac0 100644 --- a/testpar/testphdf5.h +++ b/testpar/testphdf5.h @@ -225,13 +225,11 @@ typedef enum { } ShapeSameTestMethods; /* Shared global variables */ -extern int dim0, dim1; /*Dataset dimensions */ -extern int chunkdim0, chunkdim1; /*Chunk dimensions */ -extern int nerrors; /*errors count */ -extern H5E_auto2_t old_func; /* previous error handler */ -extern void * old_client_data; /*previous error handler arg.*/ -extern int facc_type; /*Test file access type */ -extern int dxfer_coll_type; +extern int dim0, dim1; /*Dataset dimensions */ +extern int chunkdim0, chunkdim1; /*Chunk dimensions */ +extern int nerrors; /*errors count */ +extern int facc_type; /*Test file access type */ +extern int dxfer_coll_type; /* Test program prototypes */ void test_plist_ed(void); -- cgit v0.12