From 7404b57da68e92bd28c5da2053830e7cbfe032d1 Mon Sep 17 00:00:00 2001 From: jhendersonHDF Date: Thu, 2 Nov 2023 21:42:28 -0500 Subject: Rework MPI Info FAPL preserve PR to use VFD 'ctl' operations (#3782) --- src/H5FDmpi.c | 48 +++++++++++++++++++++++++++++++++++---- src/H5FDmpio.c | 7 ++++++ src/H5FDprivate.h | 1 + src/H5FDpublic.h | 1 + src/H5FDsubfiling/H5FDsubfiling.c | 6 +++++ src/H5Fint.c | 25 +++++--------------- src/H5Fmpi.c | 34 +++++++++++++++++++++++---- src/H5Fpkg.h | 1 - src/H5Fprivate.h | 1 + 9 files changed, 95 insertions(+), 29 deletions(-) diff --git a/src/H5FDmpi.c b/src/H5FDmpi.c index 127740e..f247c34 100644 --- a/src/H5FDmpi.c +++ b/src/H5FDmpi.c @@ -104,13 +104,12 @@ done: } /* end H5FD_mpi_get_size() */ /*------------------------------------------------------------------------- - * Function: H5FD_mpi_get_comm + * Function: H5FD_mpi_get_comm * - * Purpose: Retrieves the file's communicator + * Purpose: Retrieves the file's MPI_Comm communicator object * - * Return: Success: The communicator (non-negative) - * - * Failure: Negative + * Return: Success: The communicator object + * Failure: MPI_COMM_NULL * *------------------------------------------------------------------------- */ @@ -144,6 +143,45 @@ done: } /* end H5FD_mpi_get_comm() */ /*------------------------------------------------------------------------- + * Function: H5FD_mpi_get_info + * + * Purpose: Retrieves the file's MPI_Info info object + * + * Return: Success: The info object + * Failure: MPI_INFO_NULL + * + *------------------------------------------------------------------------- + */ +MPI_Info +H5FD_mpi_get_info(H5FD_t *file) +{ + const H5FD_class_t *cls; + uint64_t flags = H5FD_CTL_FAIL_IF_UNKNOWN_FLAG | H5FD_CTL_ROUTE_TO_TERMINAL_VFD_FLAG; + MPI_Info info = MPI_INFO_NULL; + void *info_ptr = (void *)(&info); + MPI_Info ret_value; + + FUNC_ENTER_NOAPI(MPI_INFO_NULL) + + assert(file); + cls = (const H5FD_class_t *)(file->cls); + assert(cls); + assert(cls->ctl); /* All MPI drivers must implement this */ + + /* Dispatch to driver */ + if ((cls->ctl)(file, H5FD_CTL_GET_MPI_INFO_OPCODE, flags, NULL, &info_ptr) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, MPI_INFO_NULL, "driver get_info request failed"); + + if (info == MPI_INFO_NULL) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, MPI_INFO_NULL, "driver get_info request failed -- bad info object"); + + ret_value = info; + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_mpi_get_info() */ + +/*------------------------------------------------------------------------- * Function: H5FD_mpi_MPIOff_to_haddr * * Purpose: Convert an MPI_Offset value to haddr_t. diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index 83a5ad4..d5dd126 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -3795,6 +3795,7 @@ done: * At present, the supported op codes are: * * H5FD_CTL_GET_MPI_COMMUNICATOR_OPCODE + * H5FD_CTL_GET_MPI_INFO_OPCODE * H5FD_CTL_GET_MPI_RANK_OPCODE * H5FD_CTL_GET_MPI_SIZE_OPCODE * H5FD_CTL_GET_MPI_FILE_SYNC_OPCODE @@ -3827,6 +3828,12 @@ H5FD__mpio_ctl(H5FD_t *_file, uint64_t op_code, uint64_t flags, const void H5_AT **((MPI_Comm **)output) = file->comm; break; + case H5FD_CTL_GET_MPI_INFO_OPCODE: + assert(output); + assert(*output); + **((MPI_Info **)output) = file->info; + break; + case H5FD_CTL_GET_MPI_RANK_OPCODE: assert(output); assert(*output); diff --git a/src/H5FDprivate.h b/src/H5FDprivate.h index 5330077..2fe54a5 100644 --- a/src/H5FDprivate.h +++ b/src/H5FDprivate.h @@ -214,6 +214,7 @@ H5_DLL herr_t H5FD_get_mpio_atomicity(H5FD_t *file, bool *flag); H5_DLL int H5FD_mpi_get_rank(H5FD_t *file); H5_DLL int H5FD_mpi_get_size(H5FD_t *file); H5_DLL MPI_Comm H5FD_mpi_get_comm(H5FD_t *file); +H5_DLL MPI_Info H5FD_mpi_get_info(H5FD_t *file); H5_DLL herr_t H5FD_mpi_get_file_sync_required(H5FD_t *file, bool *file_sync_required); #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5FDpublic.h b/src/H5FDpublic.h index 5f40bff..d8d77d6 100644 --- a/src/H5FDpublic.h +++ b/src/H5FDpublic.h @@ -179,6 +179,7 @@ #define H5FD_CTL_INVALID_OPCODE 0 #define H5FD_CTL_TEST_OPCODE 1 #define H5FD_CTL_GET_MPI_COMMUNICATOR_OPCODE 2 +#define H5FD_CTL_GET_MPI_INFO_OPCODE 9 #define H5FD_CTL_GET_MPI_RANK_OPCODE 3 #define H5FD_CTL_GET_MPI_SIZE_OPCODE 4 #define H5FD_CTL_MEM_ALLOC 5 diff --git a/src/H5FDsubfiling/H5FDsubfiling.c b/src/H5FDsubfiling/H5FDsubfiling.c index a2daba0..461fa16 100644 --- a/src/H5FDsubfiling/H5FDsubfiling.c +++ b/src/H5FDsubfiling/H5FDsubfiling.c @@ -2551,6 +2551,12 @@ H5FD__subfiling_ctl(H5FD_t *_file, uint64_t op_code, uint64_t flags, const void **((MPI_Comm **)output) = file->ext_comm; break; + case H5FD_CTL_GET_MPI_INFO_OPCODE: + assert(output); + assert(*output); + **((MPI_Info **)output) = file->info; + break; + case H5FD_CTL_GET_MPI_RANK_OPCODE: assert(output); assert(*output); diff --git a/src/H5Fint.c b/src/H5Fint.c index 439fa4f..8738026 100644 --- a/src/H5Fint.c +++ b/src/H5Fint.c @@ -402,6 +402,7 @@ H5F_get_access_plist(H5F_t *f, bool app_ref) HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set collective metadata read flag"); if (H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) { MPI_Comm mpi_comm; + MPI_Info mpi_info; /* Retrieve and set MPI communicator */ if (MPI_COMM_NULL == (mpi_comm = H5F_mpi_get_comm(f))) @@ -409,9 +410,11 @@ H5F_get_access_plist(H5F_t *f, bool app_ref) if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_COMM_NAME, &mpi_comm) < 0) HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set MPI communicator"); - /* Retrieve MPI info object */ - if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &(f->shared->mpi_info)) < 0) - HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set MPI info object"); + /* Retrieve and set MPI info */ + if (MPI_INFO_NULL == (mpi_info = H5F_mpi_get_info(f))) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, H5I_INVALID_HID, "can't get MPI info"); + if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &mpi_info) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set MPI info"); } #endif /* H5_HAVE_PARALLEL */ if (H5P_set(new_plist, H5F_ACS_META_CACHE_INIT_IMAGE_CONFIG_NAME, &(f->shared->mdc_initCacheImageCfg)) < @@ -1130,12 +1133,6 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F /* initialize point of no return */ f->shared->point_of_no_return = false; -#ifdef H5_HAVE_PARALLEL - /* Initialize this just in case we fail before setting this field and */ - /* we try to call H5_mpi_info_free() on uninitialized memory in H5F__dest() */ - f->shared->mpi_info = MPI_INFO_NULL; -#endif /* H5_HAVE_PARALLEL */ - /* Copy the file creation and file access property lists into the * new file handle. We do this early because some values might need * to change as the file is being opened. @@ -1212,8 +1209,6 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata read flag"); if (H5P_get(plist, H5F_ACS_COLL_MD_WRITE_FLAG_NAME, &(f->shared->coll_md_write)) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata write flag"); - if (H5P_get(plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &(f->shared->mpi_info)) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't set MPI info object"); #endif /* H5_HAVE_PARALLEL */ if (H5P_get(plist, H5F_ACS_META_CACHE_INIT_IMAGE_CONFIG_NAME, &(f->shared->mdc_initCacheImageCfg)) < 0) @@ -1419,14 +1414,6 @@ H5F__dest(H5F_t *f, bool flush, bool free_on_failure) f->shared->efc = NULL; } /* end if */ -#ifdef H5_HAVE_PARALLEL - if (f->shared->mpi_info != MPI_INFO_NULL) { - /* Free MPI info saved in the file struct */ - if (H5_mpi_info_free(&f->shared->mpi_info) < 0) - HDONE_ERROR(H5E_FILE, H5E_CANTRELEASE, FAIL, "can't free MPI info"); - } -#endif - /* With the shutdown modifications, the contents of the metadata cache * should be clean at this point, with the possible exception of the * the superblock and superblock extension. diff --git a/src/H5Fmpi.c b/src/H5Fmpi.c index 8a8fdc1..7a535e9 100644 --- a/src/H5Fmpi.c +++ b/src/H5Fmpi.c @@ -97,11 +97,10 @@ done: /*------------------------------------------------------------------------- * Function: H5F_mpi_get_comm * - * Purpose: Retrieves the file's communicator + * Purpose: Retrieves the file's MPI_Comm communicator object * - * Return: Success: The communicator (non-negative) - * - * Failure: Negative + * Return: Success: The communicator object + * Failure: MPI_COMM_NULL * *------------------------------------------------------------------------- */ @@ -123,6 +122,33 @@ done: } /* end H5F_mpi_get_comm() */ /*------------------------------------------------------------------------- + * Function: H5F_mpi_get_info + * + * Purpose: Retrieves the file's MPI_Info info object + * + * Return: Success: The info object + * Failure: MPI_INFO_NULL + * + *------------------------------------------------------------------------- + */ +MPI_Info +H5F_mpi_get_info(const H5F_t *f) +{ + MPI_Info ret_value = MPI_INFO_NULL; + + FUNC_ENTER_NOAPI(MPI_INFO_NULL) + + assert(f && f->shared); + + /* Dispatch to driver */ + if ((ret_value = H5FD_mpi_get_info(f->shared->lf)) == MPI_INFO_NULL) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, MPI_INFO_NULL, "driver get_info request failed"); + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5F_mpi_get_info() */ + +/*------------------------------------------------------------------------- * Function: H5F_shared_mpi_get_size * * Purpose: Retrieves the size of an MPI process. diff --git a/src/H5Fpkg.h b/src/H5Fpkg.h index e81b250..bc5c90b 100644 --- a/src/H5Fpkg.h +++ b/src/H5Fpkg.h @@ -359,7 +359,6 @@ struct H5F_shared_t { #ifdef H5_HAVE_PARALLEL H5P_coll_md_read_flag_t coll_md_read; /* Do all metadata reads collectively */ bool coll_md_write; /* Do all metadata writes collectively */ - MPI_Info mpi_info; /* MPI info */ #endif /* H5_HAVE_PARALLEL */ }; diff --git a/src/H5Fprivate.h b/src/H5Fprivate.h index 9adbf3a..682e938 100644 --- a/src/H5Fprivate.h +++ b/src/H5Fprivate.h @@ -640,6 +640,7 @@ H5_DLL herr_t H5F_eoa_dirty(H5F_t *f); #ifdef H5_HAVE_PARALLEL H5_DLL int H5F_mpi_get_rank(const H5F_t *f); H5_DLL MPI_Comm H5F_mpi_get_comm(const H5F_t *f); +H5_DLL MPI_Info H5F_mpi_get_info(const H5F_t *f); H5_DLL int H5F_shared_mpi_get_size(const H5F_shared_t *f_sh); H5_DLL int H5F_mpi_get_size(const H5F_t *f); H5_DLL herr_t H5F_mpi_retrieve_comm(hid_t loc_id, hid_t acspl_id, MPI_Comm *mpi_comm); -- cgit v0.12