diff options
author | jhendersonHDF <jhenderson@hdfgroup.org> | 2022-02-03 22:19:38 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-02-03 22:19:38 (GMT) |
commit | 1c9f219463e236487a27cee6cb839379d670cda4 (patch) | |
tree | cb696ac5138940d8c22a376654a80c67b9266ba0 /testpar | |
parent | 331cf6926d5f07a1edb75674299f9787d79d1d68 (diff) | |
download | hdf5-1c9f219463e236487a27cee6cb839379d670cda4.zip hdf5-1c9f219463e236487a27cee6cb839379d670cda4.tar.gz hdf5-1c9f219463e236487a27cee6cb839379d670cda4.tar.bz2 |
Unify handling of collective metadata reads status (#1206) (#1417)
Diffstat (limited to 'testpar')
-rw-r--r-- | testpar/t_cache.c | 40 | ||||
-rw-r--r-- | testpar/t_coll_md_read.c | 93 |
2 files changed, 77 insertions, 56 deletions
diff --git a/testpar/t_cache.c b/testpar/t_cache.c index 263b467..39c5721 100644 --- a/testpar/t_cache.c +++ b/testpar/t_cache.c @@ -6617,13 +6617,15 @@ trace_file_check(int metadata_write_strategy) static hbool_t smoke_check_6(int metadata_write_strategy) { - hbool_t success = TRUE; - int i; - int max_nerrors; - hid_t fid = -1; - H5F_t * file_ptr = NULL; - H5C_t * cache_ptr = NULL; - struct mssg_t mssg; + H5P_coll_md_read_flag_t md_reads_file_flag; + hbool_t md_reads_context_flag; + hbool_t success = TRUE; + int i; + int max_nerrors; + hid_t fid = -1; + H5F_t * file_ptr = NULL; + H5C_t * cache_ptr = NULL; + struct mssg_t mssg; switch (metadata_write_strategy) { @@ -6679,7 +6681,9 @@ smoke_check_6(int metadata_write_strategy) virt_num_data_entries = NUM_DATA_ENTRIES; /* insert the first half collectively */ - H5CX_set_coll_metadata_read(TRUE); + md_reads_file_flag = H5P_USER_TRUE; + md_reads_context_flag = TRUE; + H5F_set_coll_metadata_reads(file_ptr, &md_reads_file_flag, &md_reads_context_flag); for (i = 0; i < virt_num_data_entries / 2; i++) { struct datum *entry_ptr; entry_ptr = &(data[i]); @@ -6698,9 +6702,13 @@ smoke_check_6(int metadata_write_strategy) H5_CHECK_OVERFLOW(cache_ptr->max_cache_size, size_t, double); HDassert((double)cache_ptr->max_cache_size * 0.8 > cache_ptr->coll_list_size); } + /* Restore collective metadata reads state */ + H5F_set_coll_metadata_reads(file_ptr, &md_reads_file_flag, &md_reads_context_flag); /* insert the other half independently */ - H5CX_set_coll_metadata_read(FALSE); + md_reads_file_flag = H5P_USER_FALSE; + md_reads_context_flag = FALSE; + H5F_set_coll_metadata_reads(file_ptr, &md_reads_file_flag, &md_reads_context_flag); for (i = virt_num_data_entries / 2; i < virt_num_data_entries; i++) { struct datum *entry_ptr; entry_ptr = &(data[i]); @@ -6718,6 +6726,8 @@ smoke_check_6(int metadata_write_strategy) /* Make sure coll entries do not cross the 80% threshold */ HDassert((double)cache_ptr->max_cache_size * 0.8 > cache_ptr->coll_list_size); } + /* Restore collective metadata reads state */ + H5F_set_coll_metadata_reads(file_ptr, &md_reads_file_flag, &md_reads_context_flag); /* flush the file */ if (H5Fflush(fid, H5F_SCOPE_GLOBAL) < 0) { @@ -6728,7 +6738,9 @@ smoke_check_6(int metadata_write_strategy) } /* Protect the first half of the entries collectively */ - H5CX_set_coll_metadata_read(TRUE); + md_reads_file_flag = H5P_USER_TRUE; + md_reads_context_flag = TRUE; + H5F_set_coll_metadata_reads(file_ptr, &md_reads_file_flag, &md_reads_context_flag); for (i = 0; i < (virt_num_data_entries / 2); i++) { struct datum *entry_ptr; entry_ptr = &(data[i]); @@ -6746,9 +6758,13 @@ smoke_check_6(int metadata_write_strategy) /* Make sure coll entries do not cross the 80% threshold */ HDassert((double)cache_ptr->max_cache_size * 0.8 > cache_ptr->coll_list_size); } + /* Restore collective metadata reads state */ + H5F_set_coll_metadata_reads(file_ptr, &md_reads_file_flag, &md_reads_context_flag); /* protect the other half independently */ - H5CX_set_coll_metadata_read(FALSE); + md_reads_file_flag = H5P_USER_FALSE; + md_reads_context_flag = FALSE; + H5F_set_coll_metadata_reads(file_ptr, &md_reads_file_flag, &md_reads_context_flag); for (i = virt_num_data_entries / 2; i < virt_num_data_entries; i++) { struct datum *entry_ptr; entry_ptr = &(data[i]); @@ -6766,6 +6782,8 @@ smoke_check_6(int metadata_write_strategy) /* Make sure coll entries do not cross the 80% threshold */ HDassert((double)cache_ptr->max_cache_size * 0.8 > cache_ptr->coll_list_size); } + /* Restore collective metadata reads state */ + H5F_set_coll_metadata_reads(file_ptr, &md_reads_file_flag, &md_reads_context_flag); for (i = 0; i < (virt_num_data_entries); i++) { unlock_entry(file_ptr, i, H5AC__NO_FLAGS_SET); diff --git a/testpar/t_coll_md_read.c b/testpar/t_coll_md_read.c index fd62eb6..cabdea0 100644 --- a/testpar/t_coll_md_read.c +++ b/testpar/t_coll_md_read.c @@ -34,10 +34,9 @@ #define MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS 2 -#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DATASET_NAME "linked_chunk_io_sort_chunk_issue" -#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_Y_DIM_SCALE 20000 -#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE 1 -#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS 1 +#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_COLL_THRESH_NUM 10000 +#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DATASET_NAME "linked_chunk_io_sort_chunk_issue" +#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS 1 /* * A test for issue HDFFV-10501. A parallel hang was reported which occurred @@ -339,21 +338,34 @@ test_multi_chunk_io_addrmap_issue(void) * collective metadata reads being made only by process 0 in H5D__sort_chunk(). * * NOTE: Due to the way that the threshold value which pertains to this test - * is currently calculated within HDF5, there are several conditions that this - * test must maintain. Refer to the function H5D__sort_chunk in H5Dmpio.c for - * a better idea of why. + * is currently calculated within HDF5, the following two conditions must be + * true to trigger the issue: * - * Condition 1: We need to make sure that the test always selects every single - * chunk in the dataset. It is fine if the selection is split up among multiple - * ranks, but their combined selection must cover the whole dataset. + * Condition 1: A certain threshold ratio must be met in order to have HDF5 + * obtain all chunk addresses collectively inside H5D__sort_chunk(). This is + * given by the following: * - * Condition 2: The number of chunks in the dataset divided by the number of MPI - * ranks must exceed or equal 10000. In other words, each MPI rank must be - * responsible for 10000 or more unique chunks. + * (sum_chunk * 100) / (dataset_nchunks * mpi_size) >= 30% * - * Condition 3: This test will currently only be reliably reproducable for 2 or 3 - * MPI ranks. The threshold value calculated reduces to a constant 100 / mpi_size, - * and is compared against a default value of 30%. + * where: + * * `sum_chunk` is the combined sum of the number of chunks selected in + * the dataset by all ranks (chunks selected by more than one rank count + * individually toward the sum for each rank selecting that chunk) + * * `dataset_nchunks` is the number of chunks in the dataset (selected + * or not) + * * `mpi_size` is the size of the MPI Communicator + * + * Condition 2: `sum_chunk` divided by `mpi_size` must exceed or equal a certain + * threshold (as of this writing, 10000). + * + * To satisfy both these conditions, we #define a macro, + * LINK_CHUNK_IO_SORT_CHUNK_ISSUE_COLL_THRESH_NUM, which corresponds to the + * value of the H5D_ALL_CHUNK_ADDR_THRES_COL_NUM macro in H5Dmpio.c (the + * 10000 threshold from condition 2). We then create a dataset of that many + * chunks and have each MPI rank write to and read from a piece of every single + * chunk in the dataset. This ensures chunk utilization is the max possible + * and exceeds our 30% target ratio, while always exactly matching the numeric + * chunk threshold value of condition 2. * * Failure in this test may either cause a hang, or, due to how the MPI calls * pertaining to this issue might mistakenly match up, may cause an MPI error @@ -375,10 +387,9 @@ void test_link_chunk_io_sort_chunk_issue(void) { const char *filename; - hsize_t * dataset_dims = NULL; - hsize_t max_dataset_dims[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS]; - hsize_t sel_dims[1]; - hsize_t chunk_dims[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS] = {LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS}; + hsize_t dataset_dims[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS]; + hsize_t sel_dims[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS]; + hsize_t chunk_dims[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS]; hsize_t start[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS]; hsize_t stride[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS]; hsize_t count[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS]; @@ -412,14 +423,13 @@ test_link_chunk_io_sort_chunk_issue(void) file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id); VRFY((file_id >= 0), "H5Fcreate succeeded"); - dataset_dims = HDmalloc(LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS * sizeof(*dataset_dims)); - VRFY((dataset_dims != NULL), "malloc succeeded"); - - dataset_dims[0] = (hsize_t)LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE * (hsize_t)mpi_size * - (hsize_t)LINK_CHUNK_IO_SORT_CHUNK_ISSUE_Y_DIM_SCALE; - max_dataset_dims[0] = H5S_UNLIMITED; + /* + * Create a one-dimensional dataset of exactly LINK_CHUNK_IO_SORT_CHUNK_ISSUE_COLL_THRESH_NUM + * chunks, where every rank writes to a piece of every single chunk to keep utilization high. + */ + dataset_dims[0] = (hsize_t)mpi_size * (hsize_t)LINK_CHUNK_IO_SORT_CHUNK_ISSUE_COLL_THRESH_NUM; - fspace_id = H5Screate_simple(LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS, dataset_dims, max_dataset_dims); + fspace_id = H5Screate_simple(LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS, dataset_dims, NULL); VRFY((fspace_id >= 0), "H5Screate_simple succeeded"); /* @@ -428,6 +438,9 @@ test_link_chunk_io_sort_chunk_issue(void) dcpl_id = H5Pcreate(H5P_DATASET_CREATE); VRFY((dcpl_id >= 0), "H5Pcreate succeeded"); + /* Chunk size is equal to MPI size since each rank writes to a piece of every chunk */ + chunk_dims[0] = (hsize_t)mpi_size; + VRFY((H5Pset_chunk(dcpl_id, LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS, chunk_dims) >= 0), "H5Pset_chunk succeeded"); @@ -437,23 +450,21 @@ test_link_chunk_io_sort_chunk_issue(void) /* * Setup hyperslab selection to split the dataset among the ranks. - * - * The ranks will write rows across the dataset. */ - stride[0] = LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE; - count[0] = (dataset_dims[0] / LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE) / (hsize_t)mpi_size; - start[0] = count[0] * (hsize_t)mpi_rank; - block[0] = LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE; + start[0] = (hsize_t)mpi_rank; + stride[0] = (hsize_t)mpi_size; + count[0] = LINK_CHUNK_IO_SORT_CHUNK_ISSUE_COLL_THRESH_NUM; + block[0] = 1; VRFY((H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, start, stride, count, block) >= 0), "H5Sselect_hyperslab succeeded"); - sel_dims[0] = count[0] * (LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE); + sel_dims[0] = count[0]; mspace_id = H5Screate_simple(1, sel_dims, NULL); VRFY((mspace_id >= 0), "H5Screate_simple succeeded"); - data = HDcalloc(1, count[0] * (LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE) * sizeof(int)); + data = HDcalloc(1, count[0] * sizeof(int)); VRFY((data != NULL), "calloc succeeded"); dxpl_id = H5Pcreate(H5P_DATASET_XFER); @@ -476,33 +487,25 @@ test_link_chunk_io_sort_chunk_issue(void) VRFY((H5Pset_dxpl_mpio_chunk_opt(dxpl_id, H5FD_MPIO_CHUNK_ONE_IO) >= 0), "H5Pset_dxpl_mpio_chunk_opt succeeded"); - read_buf = HDmalloc(count[0] * (LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE) * sizeof(int)); + read_buf = HDmalloc(count[0] * sizeof(int)); VRFY((read_buf != NULL), "malloc succeeded"); VRFY((H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, start, stride, count, block) >= 0), "H5Sselect_hyperslab succeeded"); - sel_dims[0] = count[0] * (LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE); + sel_dims[0] = count[0]; VRFY((H5Sclose(mspace_id) >= 0), "H5Sclose succeeded"); mspace_id = H5Screate_simple(1, sel_dims, NULL); VRFY((mspace_id >= 0), "H5Screate_simple succeeded"); - read_buf = HDrealloc(read_buf, count[0] * (LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE) * sizeof(int)); - VRFY((read_buf != NULL), "realloc succeeded"); - /* * Finally have each rank read their section of data back from the dataset. */ VRFY((H5Dread(dset_id, H5T_NATIVE_INT, mspace_id, fspace_id, dxpl_id, read_buf) >= 0), "H5Dread succeeded"); - if (dataset_dims) { - HDfree(dataset_dims); - dataset_dims = NULL; - } - if (data) { HDfree(data); data = NULL; |