From feda52b08488dbb3e5b2450f6cd63baa98b0e57e Mon Sep 17 00:00:00 2001 From: Neil Fortner Date: Fri, 18 Feb 2022 09:27:05 -0600 Subject: Implement support for big I/O for independent reads (#1446) * Implement big I/O support for independent reads. Add test for this. * Committing clang-format changes * Update big I/O code in H5FD__mpio_write() to match that in H5FD__mpio_read(). Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- src/H5FDmpio.c | 50 ++++++++++++++++++++++++++++++++++---------------- testpar/t_bigio.c | 37 ++++++++++++++++++++++++++++++------- 2 files changed, 64 insertions(+), 23 deletions(-) diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index 1feff43..1969899 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -1172,6 +1172,7 @@ H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, hid_t H5_ATTR_UNU int n; #endif hbool_t use_view_this_time = FALSE; + hbool_t derived_type = FALSE; hbool_t rank0_bcast = FALSE; /* If read-with-rank0-and-bcast flag was used */ #ifdef H5FDmpio_DEBUG hbool_t H5FD_mpio_debug_t_flag = (H5FD_mpio_debug_flags_s[(int)'t'] && H5FD_MPIO_TRACE_THIS_RANK(file)); @@ -1199,8 +1200,6 @@ H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, hid_t H5_ATTR_UNU if (H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off /*out*/) < 0) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off") size_i = (int)size; - if ((hsize_t)size_i != size) - HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i") /* Only look for MPI views for raw data transfers */ if (type == H5FD_MEM_DRAW) { @@ -1304,6 +1303,21 @@ H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, hid_t H5_ATTR_UNU HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) } /* end if */ else { + if (size != (hsize_t)size_i) { + /* If HERE, then we need to work around the integer size limit + * of 2GB. The input size_t size variable cannot fit into an integer, + * but we can get around that limitation by creating a different datatype + * and then setting the integer size (or element count) to 1 when using + * the derived_type. + */ + + if (H5_mpio_create_large_type(size, 0, MPI_BYTE, &buf_type) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_CANTGET, FAIL, "can't create MPI-I/O datatype") + + derived_type = TRUE; + size_i = 1; + } + #ifdef H5FDmpio_DEBUG if (H5FD_mpio_debug_r_flag) HDfprintf(stderr, "%s: (%d) doing MPI independent IO\n", __func__, file->mpi_rank); @@ -1377,6 +1391,9 @@ H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, hid_t H5_ATTR_UNU HDmemset((char *)buf + bytes_read, 0, (size_t)n); done: + if (derived_type) + MPI_Type_free(&buf_type); + #ifdef H5FDmpio_DEBUG if (H5FD_mpio_debug_t_flag) HDfprintf(stderr, "%s: (%d) Leaving\n", __func__, file->mpi_rank); @@ -1489,20 +1506,6 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id, h */ mpi_off = 0; } /* end if */ - else if (size != (hsize_t)size_i) { - /* If HERE, then we need to work around the integer size limit - * of 2GB. The input size_t size variable cannot fit into an integer, - * but we can get around that limitation by creating a different datatype - * and then setting the integer size (or element count) to 1 when using - * the derived_type. - */ - - if (H5_mpio_create_large_type(size, 0, MPI_BYTE, &buf_type) < 0) - HGOTO_ERROR(H5E_INTERNAL, H5E_CANTGET, FAIL, "can't create MPI-I/O datatype") - - derived_type = TRUE; - size_i = 1; - } /* Write the data. */ if (use_view_this_time) { @@ -1548,6 +1551,21 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id, h HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) } /* end if */ else { + if (size != (hsize_t)size_i) { + /* If HERE, then we need to work around the integer size limit + * of 2GB. The input size_t size variable cannot fit into an integer, + * but we can get around that limitation by creating a different datatype + * and then setting the integer size (or element count) to 1 when using + * the derived_type. + */ + + if (H5_mpio_create_large_type(size, 0, MPI_BYTE, &buf_type) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_CANTGET, FAIL, "can't create MPI-I/O datatype") + + derived_type = TRUE; + size_i = 1; + } + #ifdef H5FDmpio_DEBUG if (H5FD_mpio_debug_w_flag) HDfprintf(stderr, "%s: (%d) doing MPI independent IO\n", __func__, file->mpi_rank); diff --git a/testpar/t_bigio.c b/testpar/t_bigio.c index 4df624b..0a971c5 100644 --- a/testpar/t_bigio.c +++ b/testpar/t_bigio.c @@ -1107,13 +1107,15 @@ single_rank_independent_io(void) HDprintf("\nSingle Rank Independent I/O\n"); if (MAIN_PROCESS) { - hsize_t dims[] = {LARGE_DIM}; - hid_t file_id = -1; - hid_t fapl_id = -1; - hid_t dset_id = -1; - hid_t fspace_id = -1; - hid_t mspace_id = -1; - void * data = NULL; + hsize_t dims[] = {LARGE_DIM}; + hid_t file_id = -1; + hid_t fapl_id = -1; + hid_t dset_id = -1; + hid_t fspace_id = -1; + hid_t mspace_id = -1; + herr_t ret; + int * data = NULL; + uint64_t i; fapl_id = H5Pcreate(H5P_FILE_ACCESS); VRFY_G((fapl_id >= 0), "H5P_FILE_ACCESS"); @@ -1135,6 +1137,10 @@ single_rank_independent_io(void) data = malloc(LARGE_DIM * sizeof(int)); + /* Initialize data */ + for (i = 0; i < LARGE_DIM; i++) + data[i] = (int)(i % (uint64_t)DXFER_BIGCOUNT); + if (mpi_rank_g == 0) H5Sselect_all(fspace_id); else @@ -1143,7 +1149,24 @@ single_rank_independent_io(void) dims[0] = LARGE_DIM; mspace_id = H5Screate_simple(1, dims, NULL); VRFY_G((mspace_id >= 0), "H5Screate_simple mspace_id succeeded"); + + /* Write data */ H5Dwrite(dset_id, H5T_NATIVE_INT, mspace_id, fspace_id, H5P_DEFAULT, data); + VRFY_G((ret >= 0), "H5Dwrite succeeded"); + + /* Wipe buffer */ + HDmemset(data, 0, LARGE_DIM * sizeof(int)); + + /* Read data back */ + H5Dread(dset_id, H5T_NATIVE_INT, mspace_id, fspace_id, H5P_DEFAULT, data); + VRFY_G((ret >= 0), "H5Dread succeeded"); + + /* Verify data */ + for (i = 0; i < LARGE_DIM; i++) + if (data[i] != (int)(i % (uint64_t)DXFER_BIGCOUNT)) { + HDfprintf(stderr, "verify failed\n"); + exit(1); + } free(data); H5Sclose(mspace_id); -- cgit v0.12