From 077898d73844c2be90ead9241bc05402befbf2f1 Mon Sep 17 00:00:00 2001 From: jhendersonHDF Date: Fri, 25 Aug 2023 08:10:23 -0500 Subject: Revise file close assertion failure fix (#3418) --- src/H5C.c | 2 +- src/H5Cimage.c | 5 +++++ src/H5Fint.c | 10 ++++----- src/H5Fsuper.c | 20 ++++++++++++++--- src/H5MF.c | 68 ++++++++++++++++++++++++++++++---------------------------- 5 files changed, 63 insertions(+), 42 deletions(-) diff --git a/src/H5C.c b/src/H5C.c index 24dd800..545154c 100644 --- a/src/H5C.c +++ b/src/H5C.c @@ -416,7 +416,7 @@ H5C_prep_for_file_close(H5F_t *f) #ifdef H5_HAVE_PARALLEL if ((H5F_INTENT(f) & H5F_ACC_RDWR) && !image_generated && cache_ptr->aux_ptr != NULL && - f->shared->sblock && f->shared->fs_persist) { + f->shared->fs_persist) { /* If persistent free space managers are enabled, flushing the * metadata cache may result in the deletion, insertion, and/or * dirtying of entries. diff --git a/src/H5Cimage.c b/src/H5Cimage.c index c99c612..4fa3a1c 100644 --- a/src/H5Cimage.c +++ b/src/H5Cimage.c @@ -654,6 +654,11 @@ H5C__load_cache_image(H5F_t *f) } /* end if */ done: + if (ret_value < 0) { + if (H5_addr_defined(cache_ptr->image_addr)) + cache_ptr->image_buffer = H5MM_xfree(cache_ptr->image_buffer); + } + FUNC_LEAVE_NOAPI(ret_value) } /* H5C__load_cache_image() */ diff --git a/src/H5Fint.c b/src/H5Fint.c index 1b5b75b..8f80e97 100644 --- a/src/H5Fint.c +++ b/src/H5Fint.c @@ -80,7 +80,7 @@ static herr_t H5F__build_name(const char *prefix, const char *file_name, char ** static char *H5F__getenv_prefix_name(char **env_prefix /*in,out*/); static H5F_t *H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5FD_t *lf); static herr_t H5F__check_if_using_file_locks(H5P_genplist_t *fapl, hbool_t *use_file_locking); -static herr_t H5F__dest(H5F_t *f, hbool_t flush); +static herr_t H5F__dest(H5F_t *f, hbool_t flush, hbool_t free_on_failure); static herr_t H5F__build_actual_name(const H5F_t *f, const H5P_genplist_t *fapl, const char *name, char ** /*out*/ actual_name); static herr_t H5F__flush_phase1(H5F_t *f); @@ -1357,7 +1357,7 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5F__dest(H5F_t *f, hbool_t flush) +H5F__dest(H5F_t *f, hbool_t flush, hbool_t free_on_failure) { herr_t ret_value = SUCCEED; /* Return value */ @@ -1624,7 +1624,7 @@ H5F__dest(H5F_t *f, hbool_t flush) HDONE_ERROR(H5E_FILE, H5E_CANTINIT, FAIL, "problems closing file"); f->shared = NULL; - if (ret_value >= 0) + if ((ret_value >= 0) || free_on_failure) f = H5FL_FREE(H5F_t, f); FUNC_LEAVE_NOAPI(ret_value) @@ -2121,7 +2121,7 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id) done: if ((NULL == ret_value) && file) - if (H5F__dest(file, FALSE) < 0) + if (H5F__dest(file, FALSE, TRUE) < 0) HDONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, NULL, "problems closing file"); FUNC_LEAVE_NOAPI(ret_value) @@ -2556,7 +2556,7 @@ H5F_try_close(H5F_t *f, hbool_t *was_closed /*out*/) * shared H5F_shared_t struct. If the reference count for the H5F_shared_t * struct reaches zero then destroy it also. */ - if (H5F__dest(f, TRUE) < 0) + if (H5F__dest(f, TRUE, FALSE) < 0) HGOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, "problems closing file"); /* Since we closed the file, this should be set to TRUE */ diff --git a/src/H5Fsuper.c b/src/H5Fsuper.c index cd20191..3160eb3 100644 --- a/src/H5Fsuper.c +++ b/src/H5Fsuper.c @@ -1076,8 +1076,9 @@ H5F__super_init(H5F_t *f) FALSE; /* Whether the driver info block has been inserted into the metadata cache */ H5P_genplist_t *plist; /* File creation property list */ H5AC_ring_t orig_ring = H5AC_RING_INV; - hsize_t userblock_size; /* Size of userblock, in bytes */ - hsize_t superblock_size; /* Size of superblock, in bytes */ + hsize_t userblock_size; /* Size of userblock, in bytes */ + hsize_t superblock_size = 0; /* Size of superblock, in bytes */ + haddr_t superblock_addr = HADDR_UNDEF; size_t driver_size; /* Size of driver info block (bytes) */ unsigned super_vers = HDF5_SUPERBLOCK_VERSION_DEF; /* Superblock version for file */ H5O_loc_t ext_loc; /* Superblock extension object location */ @@ -1277,7 +1278,7 @@ H5F__super_init(H5F_t *f) f->shared->sblock = sblock; /* Allocate space for the superblock */ - if (HADDR_UNDEF == H5MF_alloc(f, H5FD_MEM_SUPER, superblock_size)) + if (HADDR_UNDEF == (superblock_addr = H5MF_alloc(f, H5FD_MEM_SUPER, superblock_size))) HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "file allocation failed for superblock"); /* set the drvinfo filed to NULL -- will overwrite this later if needed */ @@ -1469,6 +1470,19 @@ done: /* Check if the superblock has been allocated yet */ if (sblock) { + if (non_default_fs_settings && H5_addr_defined(superblock_addr)) { + /* + * For non-default free-space settings, the allocation of + * space in the file for the superblock may have have allocated + * memory for the free-space manager and inserted it into the + * metadata cache. Clean that up before returning or we may fail + * to close the file later due to the metadata cache's metadata + * free space manager ring (H5AC_RING_MDFSM) not being clean. + */ + if (H5MF_try_close(f) < 0) + HDONE_ERROR(H5E_FILE, H5E_CANTFREE, FAIL, "can't close file free space manager"); + } + /* Check if we've cached it already */ if (sblock_in_cache) { /* Unpin superblock in cache */ diff --git a/src/H5MF.c b/src/H5MF.c index 2c9fb6d..b527d6b 100644 --- a/src/H5MF.c +++ b/src/H5MF.c @@ -2581,16 +2581,14 @@ H5MF_settle_raw_data_fsm(H5F_t *f, hbool_t *fsm_settled) hbool_t fsm_opened[H5F_MEM_PAGE_NTYPES]; /* State of FSM */ hbool_t fsm_visited[H5F_MEM_PAGE_NTYPES]; /* State of FSM */ - /* Sanity check */ - assert(f->shared->sblock); - /* should only be called if file is opened R/W */ assert(H5F_INTENT(f) & H5F_ACC_RDWR); /* shouldn't be called unless we have a superblock supporting the * superblock extension. */ - assert(f->shared->sblock->super_vers >= HDF5_SUPERBLOCK_VERSION_2); + if (f->shared->sblock) + assert(f->shared->sblock->super_vers >= HDF5_SUPERBLOCK_VERSION_2); /* Initialize fsm_opened and fsm_visited */ memset(fsm_opened, 0, sizeof(fsm_opened)); @@ -2738,40 +2736,44 @@ H5MF_settle_raw_data_fsm(H5F_t *f, hbool_t *fsm_settled) * file space manager info message is guaranteed to exist. * Leave it in for now, but consider removing it. */ - if (H5_addr_defined(f->shared->sblock->ext_addr)) - if (H5F__super_ext_remove_msg(f, H5O_FSINFO_ID) < 0) - HGOTO_ERROR(H5E_RESOURCE, H5E_CANTRELEASE, FAIL, - "error in removing message from superblock extension"); + if (f->shared->sblock) { + if (H5_addr_defined(f->shared->sblock->ext_addr)) + if (H5F__super_ext_remove_msg(f, H5O_FSINFO_ID) < 0) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTRELEASE, FAIL, + "error in removing message from superblock extension"); + } /* As the final element in 1), shrink the EOA for the file */ if (H5MF__close_shrink_eoa(f) < 0) HGOTO_ERROR(H5E_RESOURCE, H5E_CANTSHRINK, FAIL, "can't shrink eoa"); - /* 2) Ensure that space is allocated for the free space manager superblock - * extension message. Must do this now, before reallocating file space - * for free space managers, as it is possible that this allocation may - * grab the last section in a FSM -- making it unnecessary to - * re-allocate file space for it. - * - * Do this by writing a free space manager superblock extension message. - * - * Since no free space manager has file space allocated for it, this - * message must be invalid since we can't save addresses of FSMs when - * those addresses are unknown. This is OK -- we will write the correct - * values to the message at free space manager shutdown. - */ - for (fsm_type = H5F_MEM_PAGE_SUPER; fsm_type < H5F_MEM_PAGE_NTYPES; fsm_type++) - fsinfo.fs_addr[fsm_type - 1] = HADDR_UNDEF; - fsinfo.strategy = f->shared->fs_strategy; - fsinfo.persist = f->shared->fs_persist; - fsinfo.threshold = f->shared->fs_threshold; - fsinfo.page_size = f->shared->fs_page_size; - fsinfo.pgend_meta_thres = f->shared->pgend_meta_thres; - fsinfo.eoa_pre_fsm_fsalloc = HADDR_UNDEF; - - if (H5F__super_ext_write_msg(f, H5O_FSINFO_ID, &fsinfo, TRUE, H5O_MSG_FLAG_MARK_IF_UNKNOWN) < 0) - HGOTO_ERROR(H5E_RESOURCE, H5E_WRITEERROR, FAIL, - "error in writing fsinfo message to superblock extension"); + if (f->shared->sblock) { + /* 2) Ensure that space is allocated for the free space manager superblock + * extension message. Must do this now, before reallocating file space + * for free space managers, as it is possible that this allocation may + * grab the last section in a FSM -- making it unnecessary to + * re-allocate file space for it. + * + * Do this by writing a free space manager superblock extension message. + * + * Since no free space manager has file space allocated for it, this + * message must be invalid since we can't save addresses of FSMs when + * those addresses are unknown. This is OK -- we will write the correct + * values to the message at free space manager shutdown. + */ + for (fsm_type = H5F_MEM_PAGE_SUPER; fsm_type < H5F_MEM_PAGE_NTYPES; fsm_type++) + fsinfo.fs_addr[fsm_type - 1] = HADDR_UNDEF; + fsinfo.strategy = f->shared->fs_strategy; + fsinfo.persist = f->shared->fs_persist; + fsinfo.threshold = f->shared->fs_threshold; + fsinfo.page_size = f->shared->fs_page_size; + fsinfo.pgend_meta_thres = f->shared->pgend_meta_thres; + fsinfo.eoa_pre_fsm_fsalloc = HADDR_UNDEF; + + if (H5F__super_ext_write_msg(f, H5O_FSINFO_ID, &fsinfo, TRUE, H5O_MSG_FLAG_MARK_IF_UNKNOWN) < 0) + HGOTO_ERROR(H5E_RESOURCE, H5E_WRITEERROR, FAIL, + "error in writing fsinfo message to superblock extension"); + } /* 3) Scan all free space managers not involved in allocating * space for free space managers. For each such free space -- cgit v0.12