diff options
Diffstat (limited to 'src/H5C.c')
-rw-r--r-- | src/H5C.c | 4152 |
1 files changed, 2032 insertions, 2120 deletions
@@ -41,7 +41,7 @@ * * Code Changes: * - * - Remove extra functionality in H5C_flush_single_entry()? + * - Remove extra functionality in H5C__flush_single_entry()? * * - Change protect/unprotect to lock/unlock. * @@ -70,60 +70,68 @@ * **************************************************************************/ +/****************/ +/* Module Setup */ +/****************/ + #define H5C_PACKAGE /*suppress error about including H5Cpkg */ #define H5F_PACKAGE /*suppress error about including H5Fpkg */ +/***********/ +/* Headers */ +/***********/ #include "H5private.h" /* Generic Functions */ #ifdef H5_HAVE_PARALLEL #include "H5ACprivate.h" /* Metadata cache */ #endif /* H5_HAVE_PARALLEL */ #include "H5Cpkg.h" /* Cache */ -#include "H5Dprivate.h" /* Dataset functions */ #include "H5Eprivate.h" /* Error handling */ #include "H5Fpkg.h" /* Files */ #include "H5FDprivate.h" /* File drivers */ #include "H5FLprivate.h" /* Free Lists */ #include "H5Iprivate.h" /* IDs */ +#include "H5MFprivate.h" /* File memory management */ #include "H5MMprivate.h" /* Memory management */ #include "H5Pprivate.h" /* Property lists */ #include "H5SLprivate.h" /* Skip lists */ -/* - * Private file-scope variables. - */ +/****************/ +/* Local Macros */ +/****************/ +#if H5C_DO_MEMORY_SANITY_CHECKS +#define H5C_IMAGE_EXTRA_SPACE 8 +#define H5C_IMAGE_SANITY_VALUE "DeadBeef" +#else /* H5C_DO_MEMORY_SANITY_CHECKS */ +#define H5C_IMAGE_EXTRA_SPACE 0 +#endif /* H5C_DO_MEMORY_SANITY_CHECKS */ -/* Declare a free list to manage the H5C_t struct */ -H5FL_DEFINE_STATIC(H5C_t); +/******************/ +/* Local Typedefs */ +/******************/ -/* - * Private file-scope function declarations: - */ +/********************/ +/* Local Prototypes */ +/********************/ static herr_t H5C__auto_adjust_cache_size(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - hbool_t write_permitted, - hbool_t * first_flush_ptr); + hid_t dxpl_id, + hbool_t write_permitted); static herr_t H5C__autoadjust__ageout(H5F_t * f, + hid_t dxpl_id, double hit_rate, enum H5C_resize_status * status_ptr, size_t * new_max_cache_size_ptr, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - hbool_t write_permitted, - hbool_t * first_flush_ptr); + hbool_t write_permitted); static herr_t H5C__autoadjust__ageout__cycle_epoch_marker(H5C_t * cache_ptr); static herr_t H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - hbool_t write_permitted, - hbool_t * first_flush_ptr); + hid_t dxpl_id, + hbool_t write_permitted); static herr_t H5C__autoadjust__ageout__insert_new_marker(H5C_t * cache_ptr); @@ -135,19 +143,9 @@ static herr_t H5C__flash_increase_cache_size(H5C_t * cache_ptr, size_t old_entry_size, size_t new_entry_size); -static herr_t H5C_flush_single_entry(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - const H5C_class_t * type_ptr, - haddr_t addr, - unsigned flags, - hbool_t * first_flush_ptr, - hbool_t del_entry_from_slist_on_destroy); - -static herr_t H5C_flush_invalidate_cache(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - unsigned flags); +static herr_t H5C_flush_invalidate_cache(const H5F_t * f, + hid_t dxpl_id, + unsigned flags); static void * H5C_load_entry(H5F_t * f, hid_t dxpl_id, @@ -156,19 +154,16 @@ static void * H5C_load_entry(H5F_t * f, void * udata); static herr_t H5C_make_space_in_cache(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - size_t space_needed, - hbool_t write_permitted, - hbool_t * first_flush_ptr); + hid_t dxpl_id, + size_t space_needed, + hbool_t write_permitted); static herr_t H5C_tag_entry(H5C_t * cache_ptr, H5C_cache_entry_t * entry_ptr, hid_t dxpl_id); static herr_t H5C_flush_tagged_entries(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, + hid_t dxpl_id, H5C_t * cache_ptr, haddr_t tag); @@ -176,20 +171,46 @@ static herr_t H5C_mark_tagged_entries(H5C_t * cache_ptr, haddr_t tag); static herr_t H5C_flush_marked_entries(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - H5C_t * cache_ptr); + hid_t dxpl_id); #if H5C_DO_TAGGING_SANITY_CHECKS static herr_t H5C_verify_tag(int id, haddr_t tag); #endif +#if H5C_DO_SLIST_SANITY_CHECKS +static hbool_t H5C_entry_in_skip_list(H5C_t * cache_ptr, + H5C_cache_entry_t *target_ptr); +#endif /* H5C_DO_SLIST_SANITY_CHECKS */ + #if H5C_DO_EXTREME_SANITY_CHECKS static herr_t H5C_validate_lru_list(H5C_t * cache_ptr); static herr_t H5C_validate_pinned_entry_list(H5C_t * cache_ptr); static herr_t H5C_validate_protected_entry_list(H5C_t * cache_ptr); #endif /* H5C_DO_EXTREME_SANITY_CHECKS */ +#if 0 /* debugging routines */ +herr_t H5C_dump_cache(H5C_t * cache_ptr, const char * cache_name); +herr_t H5C_dump_cache_skip_list(H5C_t * cache_ptr, char * calling_fcn); +#endif /* debugging routines */ + +/*********************/ +/* Package Variables */ +/*********************/ + + +/*****************************/ +/* Library Private Variables */ +/*****************************/ + + +/*******************/ +/* Local Variables */ +/*******************/ + +/* Declare a free list to manage the H5C_t struct */ +H5FL_DEFINE_STATIC(H5C_t); + + /**************************************************************************** * @@ -208,25 +229,53 @@ static herr_t H5C_validate_protected_entry_list(H5C_t * cache_ptr); #define H5C__EPOCH_MARKER_TYPE H5C__MAX_NUM_TYPE_IDS -static void *H5C_epoch_marker_load(H5F_t *f, hid_t dxpl_id, haddr_t addr, - void *udata); -static herr_t H5C_epoch_marker_flush(H5F_t *f, hid_t dxpl_id, hbool_t dest, - haddr_t addr, void *thing, - unsigned *flags_ptr); -static herr_t H5C_epoch_marker_dest(H5F_t *f, void *thing); -static herr_t H5C_epoch_marker_clear(H5F_t *f, void *thing, hbool_t dest); -static herr_t H5C_epoch_marker_notify(H5C_notify_action_t action, void *thing); -static herr_t H5C_epoch_marker_size(const H5F_t *f, const void *thing, size_t *size_ptr); +static herr_t H5C__epoch_marker_get_load_size(const void *udata_ptr, + size_t *image_len_ptr); +static void * H5C__epoch_marker_deserialize(const void * image_ptr, + size_t len, + void * udata, + hbool_t * dirty_ptr); +static herr_t H5C__epoch_marker_image_len(const void * thing, + size_t *image_len_ptr, + hbool_t *compressed_ptr, + size_t *compressed_len_ptr); +static herr_t H5C__epoch_marker_pre_serialize(const H5F_t *f, + hid_t dxpl_id, + void * thing, + haddr_t addr, + size_t len, + size_t compressed_len, + haddr_t * new_addr_ptr, + size_t * new_len_ptr, + size_t * new_compressed_len_ptr, + unsigned * flags_ptr); +static herr_t H5C__epoch_marker_serialize(const H5F_t *f, + void * image_ptr, + size_t len, + void * thing); +static herr_t H5C__epoch_marker_notify(H5C_notify_action_t action, void *thing); +static herr_t H5C__epoch_marker_free_icr(void * thing); + +static herr_t H5C__epoch_marker_clear(const H5F_t *f, void * thing, + hbool_t about_to_destroy); +static herr_t H5C__epoch_marker_fsf_size(const void H5_ATTR_UNUSED * thing, + size_t H5_ATTR_UNUSED * fsf_size_ptr); const H5C_class_t epoch_marker_class = { - /* id = */ H5C__EPOCH_MARKER_TYPE, - /* load = */ &H5C_epoch_marker_load, - /* flush = */ &H5C_epoch_marker_flush, - /* dest = */ &H5C_epoch_marker_dest, - /* clear = */ &H5C_epoch_marker_clear, - /* notify = */&H5C_epoch_marker_notify, - /* size = */ &H5C_epoch_marker_size + /* id = */ H5C__EPOCH_MARKER_TYPE, + /* name = */ "epoch marker", + /* mem_type = */ H5FD_MEM_DEFAULT, /* value doesn't matter */ + /* flags = */ H5AC__CLASS_NO_FLAGS_SET, + /* get_load_size = */ H5C__epoch_marker_get_load_size, + /* deserialize = */ H5C__epoch_marker_deserialize, + /* image_len = */ H5C__epoch_marker_image_len, + /* pre_serialize = */ H5C__epoch_marker_pre_serialize, + /* serialize = */ H5C__epoch_marker_serialize, + /* notify = */ H5C__epoch_marker_notify, + /* free_icr = */ H5C__epoch_marker_free_icr, + /* clear = */ H5C__epoch_marker_clear, + /* fsf_size = */ H5C__epoch_marker_fsf_size, }; @@ -239,848 +288,114 @@ const H5C_class_t epoch_marker_class = * JRM - 11/16/04 * ***************************************************************************/ - -static void * -H5C_epoch_marker_load(H5F_t H5_ATTR_UNUSED * f, - hid_t H5_ATTR_UNUSED dxpl_id, - haddr_t H5_ATTR_UNUSED addr, - void H5_ATTR_UNUSED * udata) +static herr_t +H5C__epoch_marker_get_load_size(const void H5_ATTR_UNUSED *udata_ptr, + size_t H5_ATTR_UNUSED *image_len_ptr) { - void * ret_value = NULL; /* Return value */ + FUNC_ENTER_STATIC_NOERR /* Yes, even though this pushes an error on the stack */ - FUNC_ENTER_NOAPI_NOINIT + HERROR(H5E_CACHE, H5E_SYSTEM, "called unreachable fcn."); - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, NULL, "called unreachable fcn.") + FUNC_LEAVE_NOAPI(FAIL) +} /* end H5C__epoch_marker_get_load_size() */ -done: + +static void * +H5C__epoch_marker_deserialize(const void H5_ATTR_UNUSED * image_ptr, size_t H5_ATTR_UNUSED len, + void H5_ATTR_UNUSED * udata, hbool_t H5_ATTR_UNUSED * dirty_ptr) +{ + FUNC_ENTER_STATIC_NOERR /* Yes, even though this pushes an error on the stack */ - FUNC_LEAVE_NOAPI(ret_value) -} + HERROR(H5E_CACHE, H5E_SYSTEM, "called unreachable fcn."); + FUNC_LEAVE_NOAPI(NULL) +} /* end H5C__epoch_marker_deserialize() */ static herr_t -H5C_epoch_marker_flush(H5F_t H5_ATTR_UNUSED *f, - hid_t H5_ATTR_UNUSED dxpl_id, - hbool_t H5_ATTR_UNUSED dest, - haddr_t H5_ATTR_UNUSED addr, - void H5_ATTR_UNUSED *thing, - unsigned H5_ATTR_UNUSED * flags_ptr) +H5C__epoch_marker_image_len(const void H5_ATTR_UNUSED *thing, + size_t H5_ATTR_UNUSED *image_len_ptr, hbool_t H5_ATTR_UNUSED *compressed_ptr, + size_t H5_ATTR_UNUSED *compressed_len_ptr) { - herr_t ret_value = FAIL; /* Return value */ - - FUNC_ENTER_NOAPI_NOINIT - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "called unreachable fcn.") + FUNC_ENTER_STATIC_NOERR /* Yes, even though this pushes an error on the stack */ -done: - - FUNC_LEAVE_NOAPI(ret_value) -} + HERROR(H5E_CACHE, H5E_SYSTEM, "called unreachable fcn."); + FUNC_LEAVE_NOAPI(FAIL) +} /* end H5C__epoch_marker_image_len() */ static herr_t -H5C_epoch_marker_dest(H5F_t H5_ATTR_UNUSED * f, - void H5_ATTR_UNUSED * thing) +H5C__epoch_marker_pre_serialize(const H5F_t H5_ATTR_UNUSED *f, hid_t H5_ATTR_UNUSED dxpl_id, + void H5_ATTR_UNUSED *thing, haddr_t H5_ATTR_UNUSED addr, size_t H5_ATTR_UNUSED len, + size_t H5_ATTR_UNUSED compressed_len, haddr_t H5_ATTR_UNUSED *new_addr_ptr, + size_t H5_ATTR_UNUSED *new_len_ptr, size_t H5_ATTR_UNUSED *new_compressed_len_ptr, + unsigned H5_ATTR_UNUSED *flags_ptr) { - herr_t ret_value = FAIL; /* Return value */ - - FUNC_ENTER_NOAPI_NOINIT - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "called unreachable fcn.") + FUNC_ENTER_STATIC_NOERR /* Yes, even though this pushes an error on the stack */ -done: + HERROR(H5E_CACHE, H5E_SYSTEM, "called unreachable fcn."); - FUNC_LEAVE_NOAPI(ret_value) -} + FUNC_LEAVE_NOAPI(FAIL) +} /* end H5C__epoch_marker_pre_serialize() */ + static herr_t -H5C_epoch_marker_clear(H5F_t H5_ATTR_UNUSED * f, - void H5_ATTR_UNUSED * thing, - hbool_t H5_ATTR_UNUSED dest) +H5C__epoch_marker_serialize(const H5F_t H5_ATTR_UNUSED *f, void H5_ATTR_UNUSED *image_ptr, + size_t H5_ATTR_UNUSED len, void H5_ATTR_UNUSED *thing) { - herr_t ret_value = FAIL; /* Return value */ - - FUNC_ENTER_NOAPI_NOINIT - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "called unreachable fcn.") + FUNC_ENTER_STATIC_NOERR /* Yes, even though this pushes an error on the stack */ -done: + HERROR(H5E_CACHE, H5E_SYSTEM, "called unreachable fcn."); - FUNC_LEAVE_NOAPI(ret_value) -} + FUNC_LEAVE_NOAPI(FAIL) +} /* end H5C__epoch_marker_serialize() */ + static herr_t -H5C_epoch_marker_notify(H5C_notify_action_t H5_ATTR_UNUSED action, +H5C__epoch_marker_notify(H5C_notify_action_t H5_ATTR_UNUSED action, void H5_ATTR_UNUSED * thing) { - herr_t ret_value = FAIL; /* Return value */ + FUNC_ENTER_STATIC_NOERR /* Yes, even though this pushes an error on the stack */ - FUNC_ENTER_NOAPI_NOINIT + HERROR(H5E_CACHE, H5E_SYSTEM, "called unreachable fcn."); - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "called unreachable fcn.") - -done: - FUNC_LEAVE_NOAPI(ret_value) -} + FUNC_LEAVE_NOAPI(FAIL) +} /* end H5C__epoch_marker_notify() */ + static herr_t -H5C_epoch_marker_size(const H5F_t H5_ATTR_UNUSED * f, - const void H5_ATTR_UNUSED * thing, - size_t H5_ATTR_UNUSED * size_ptr) +H5C__epoch_marker_free_icr(void H5_ATTR_UNUSED * thing) { - herr_t ret_value = FAIL; /* Return value */ + FUNC_ENTER_STATIC_NOERR /* Yes, even though this pushes an error on the stack */ - FUNC_ENTER_NOAPI_NOINIT + HERROR(H5E_CACHE, H5E_SYSTEM, "called unreachable fcn."); - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "called unreachable fcn.") - -done: - - FUNC_LEAVE_NOAPI(ret_value) -} + FUNC_LEAVE_NOAPI(FAIL) +} /* end H5C__epoch_marker_free_icr() */ -/*------------------------------------------------------------------------- - * Function: H5C_apply_candidate_list - * - * Purpose: Apply the supplied candidate list. - * - * We used to do this by simply having each process write - * every mpi_size-th entry in the candidate list, starting - * at index mpi_rank, and mark all the others clean. - * - * However, this can cause unnecessary contention in a file - * system by increasing the number of processes writing to - * adjacent locations in the HDF5 file. - * - * To attempt to minimize this, we now arange matters such - * that each process writes n adjacent entries in the - * candidate list, and marks all others clean. We must do - * this in such a fashion as to guarantee that each entry - * on the candidate list is written by exactly one process, - * and marked clean by all others. - * - * To do this, first construct a table mapping mpi_rank - * to the index of the first entry in the candidate list to - * be written by the process of that mpi_rank, and then use - * the table to control which entries are written and which - * are marked as clean as a function of the mpi_rank. - * - * Note that the table must be identical on all processes, as - * all see the same candidate list, mpi_size, and mpi_rank -- - * the inputs used to construct the table. - * - * We construct the table as follows. Let: - * - * n = num_candidates / mpi_size; - * - * m = num_candidates % mpi_size; - * - * Now allocate an array of integers of length mpi_size + 1, - * and call this array candidate_assignment_table. - * - * Conceptually, if the number of candidates is a multiple - * of the mpi_size, we simply pass through the candidate list - * and assign n entries to each process to flush, with the - * index of the first entry to flush in the location in - * the candidate_assignment_table indicated by the mpi_rank - * of the process. - * - * In the more common case in which the candidate list isn't - * isn't a multiple of the mpi_size, we pretend it is, and - * give num_candidates % mpi_size processes one extra entry - * each to make things work out. - * - * Once the table is constructed, we determine the first and - * last entry this process is to flush as follows: - * - * first_entry_to_flush = candidate_assignment_table[mpi_rank] - * - * last_entry_to_flush = - * candidate_assignment_table[mpi_rank + 1] - 1; - * - * With these values determined, we simply scan through the - * candidate list, marking all entries in the range - * [first_entry_to_flush, last_entry_to_flush] for flush, - * and all others to be cleaned. - * - * Finally, we scan the LRU from tail to head, flushing - * or marking clean the candidate entries as indicated. - * If necessary, we scan the pinned list as well. - * - * Note that this function will fail if any protected or - * clean entries appear on the candidate list. - * - * This function is used in managing sync points, and - * shouldn't be used elsewhere. - * - * Return: Success: SUCCEED - * - * Failure: FAIL - * - * Programmer: John Mainzer - * 3/17/10 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -#define H5C_APPLY_CANDIDATE_LIST__DEBUG 0 -herr_t -H5C_apply_candidate_list(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - H5C_t * cache_ptr, - int num_candidates, - haddr_t * candidates_list_ptr, - int mpi_rank, - int mpi_size) -{ - hbool_t first_flush = FALSE; - int i; - int m; - int n; - int first_entry_to_flush; - int last_entry_to_flush; - int entries_to_clear = 0; - int entries_to_flush = 0; - int entries_to_flush_or_clear_last = 0; - int entries_to_flush_collectively = 0; - int entries_cleared = 0; - int entries_flushed = 0; - int entries_delayed = 0; - int entries_flushed_or_cleared_last = 0; - int entries_flushed_collectively = 0; - int entries_examined = 0; - int initial_list_len; - int * candidate_assignment_table = NULL; - haddr_t addr; - H5C_cache_entry_t * clear_ptr = NULL; - H5C_cache_entry_t * entry_ptr = NULL; - H5C_cache_entry_t * flush_ptr = NULL; - H5C_cache_entry_t * delayed_ptr = NULL; -#if H5C_DO_SANITY_CHECKS - haddr_t last_addr; -#endif /* H5C_DO_SANITY_CHECKS */ -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - char tbl_buf[1024]; -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_NOAPI(FAIL) - - HDassert( cache_ptr != NULL ); - HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - HDassert( num_candidates > 0 ); - HDassert( num_candidates <= cache_ptr->slist_len ); - HDassert( candidates_list_ptr != NULL ); - HDassert( 0 <= mpi_rank ); - HDassert( mpi_rank < mpi_size ); - -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - HDfprintf(stdout, "%s:%d: setting up candidate assignment table.\n", - FUNC, mpi_rank); - for ( i = 0; i < 1024; i++ ) tbl_buf[i] = '\0'; - sprintf(&(tbl_buf[0]), "candidate list = "); - for ( i = 0; i < num_candidates; i++ ) - { - sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), " 0x%llx", - (long long)(*(candidates_list_ptr + i))); - } - sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), "\n"); - HDfprintf(stdout, "%s", tbl_buf); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - n = num_candidates / mpi_size; - m = num_candidates % mpi_size; - HDassert(n >= 0); - - if(NULL == (candidate_assignment_table = (int *)H5MM_malloc(sizeof(int) * (size_t)(mpi_size + 1)))) - HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for candidate assignment table") - - candidate_assignment_table[0] = 0; - candidate_assignment_table[mpi_size] = num_candidates; - - if(m == 0) { /* mpi_size is an even divisor of num_candidates */ - HDassert(n > 0); - for(i = 1; i < mpi_size; i++) - candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n; - } /* end if */ - else { - for(i = 1; i <= m; i++) - candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n + 1; - - if(num_candidates < mpi_size) { - for(i = m + 1; i < mpi_size; i++) - candidate_assignment_table[i] = num_candidates; - } /* end if */ - else { - for(i = m + 1; i < mpi_size; i++) - candidate_assignment_table[i] = candidate_assignment_table[i - 1] + n; - } /* end else */ - } /* end else */ - HDassert((candidate_assignment_table[mpi_size - 1] + n) == num_candidates); - -#if H5C_DO_SANITY_CHECKS - /* verify that the candidate assignment table has the expected form */ - for ( i = 1; i < mpi_size - 1; i++ ) - { - int a, b; - - a = candidate_assignment_table[i] - candidate_assignment_table[i - 1]; - b = candidate_assignment_table[i + 1] - candidate_assignment_table[i]; - - HDassert( n + 1 >= a ); - HDassert( a >= b ); - HDassert( b >= n ); - } -#endif /* H5C_DO_SANITY_CHECKS */ - - first_entry_to_flush = candidate_assignment_table[mpi_rank]; - last_entry_to_flush = candidate_assignment_table[mpi_rank + 1] - 1; - -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - for ( i = 0; i < 1024; i++ ) - tbl_buf[i] = '\0'; - sprintf(&(tbl_buf[0]), "candidate assignment table = "); - for(i = 0; i <= mpi_size; i++) - sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), " %d", candidate_assignment_table[i]); - sprintf(&(tbl_buf[HDstrlen(tbl_buf)]), "\n"); - HDfprintf(stdout, "%s", tbl_buf); - - HDfprintf(stdout, "%s:%d: flush entries [%d, %d].\n", - FUNC, mpi_rank, first_entry_to_flush, last_entry_to_flush); - - HDfprintf(stdout, "%s:%d: marking entries.\n", FUNC, mpi_rank); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - for(i = 0; i < num_candidates; i++) { - addr = candidates_list_ptr[i]; - HDassert( H5F_addr_defined(addr) ); - -#if H5C_DO_SANITY_CHECKS - if ( i > 0 ) { - if ( last_addr == addr ) { - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Duplicate entry in cleaned list.\n") - } else if ( last_addr > addr ) { - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "candidate list not sorted.\n") - } - } - - last_addr = addr; -#endif /* H5C_DO_SANITY_CHECKS */ - - H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL) - if(entry_ptr == NULL) { - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed candidate entry not in cache?!?!?.") - } else if(!entry_ptr->is_dirty) { - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry not dirty?!?!?.") - } else if ( entry_ptr->is_protected ) { - /* For now at least, we can't deal with protected entries. - * If we encounter one, scream and die. If it becomes an - * issue, we should be able to work around this. - */ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Listed entry is protected?!?!?.") - } else { - /* determine whether the entry is to be cleared or flushed, - * and mark it accordingly. We will scan the protected and - * pinned list shortly, and clear or flush according to these - * markings. - */ - if((i >= first_entry_to_flush) && (i <= last_entry_to_flush)) { - entries_to_flush++; - entry_ptr->flush_immediately = TRUE; - } /* end if */ - else { - entries_to_clear++; - entry_ptr->clear_on_unprotect = TRUE; - } /* end else */ - } /* end else */ - } /* end for */ - -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - HDfprintf(stdout, "%s:%d: num candidates/to clear/to flush = %d/%d/%d.\n", - FUNC, mpi_rank, (int)num_candidates, (int)entries_to_clear, - (int)entries_to_flush); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - - /* We have now marked all the entries on the candidate list for - * either flush or clear -- now scan the LRU and the pinned list - * for these entries and do the deed. - * - * Note that we are doing things in this round about manner so as - * to preserve the order of the LRU list to the best of our ability. - * If we don't do this, my experiments indicate that we will have a - * noticably poorer hit ratio as a result. - */ - -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - HDfprintf(stdout, "%s:%d: scanning LRU list. len = %d.\n", FUNC, mpi_rank, - (int)(cache_ptr->LRU_list_len)); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - /* ===================================================================== * - * Now scan the LRU and PEL lists, flushing or clearing entries as - * needed. - * - * The flush_me_last and flush_me_collectively flags may dictate how or - * when some entries can be flushed, and should be addressed here. - * However, in their initial implementation, these flags only apply to the - * superblock, so there's only a relatively small change to this function - * to account for this one case where they come into play. If these flags - * are ever expanded upon, this function and the following flushing steps - * should be reworked to account for additional cases. - * ===================================================================== */ - - entries_examined = 0; - initial_list_len = cache_ptr->LRU_list_len; - entry_ptr = cache_ptr->LRU_tail_ptr; - - /* Examine each entry in the LRU list */ - while((entry_ptr != NULL) && (entries_examined <= initial_list_len) && - ((entries_cleared + entries_flushed) < num_candidates)) { - - /* If this process needs to clear this entry. */ - if(entry_ptr->clear_on_unprotect) { - entry_ptr->clear_on_unprotect = FALSE; - clear_ptr = entry_ptr; - entry_ptr = entry_ptr->prev; - entries_cleared++; - -#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) - HDfprintf(stdout, "%s:%d: clearing 0x%llx.\n", FUNC, mpi_rank, - (long long)clear_ptr->addr); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - if(H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - clear_ptr->type, - clear_ptr->addr, - H5C__FLUSH_CLEAR_ONLY_FLAG, - &first_flush, - TRUE) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") - } /* end if */ - - /* Else, if this process needs to flush this entry. */ - else if (entry_ptr->flush_immediately) { - - entry_ptr->flush_immediately = FALSE; - flush_ptr = entry_ptr; - entry_ptr = entry_ptr->prev; - entries_flushed++; - -#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) - HDfprintf(stdout, "%s:%d: flushing 0x%llx.\n", FUNC, mpi_rank, - (long long)flush_ptr->addr); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - if(H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - flush_ptr->type, - flush_ptr->addr, - H5C__NO_FLAGS_SET, - &first_flush, - TRUE) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't flush entry.") - } /* end else-if */ - - /* Otherwise, no action to be taken on this entry. Grab the next. */ - else { - entry_ptr = entry_ptr->prev; - } /* end else */ - - entries_examined++; - } /* end while */ - -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - HDfprintf(stdout, "%s:%d: entries examined/cleared/flushed = %d/%d/%d.\n", - FUNC, mpi_rank, entries_examined, - entries_cleared, entries_flushed); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - /* It is also possible that some of the cleared entries are on the - * pinned list. Must scan that also. - */ - -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - HDfprintf(stdout, "%s:%d: scanning pinned entry list. len = %d\n", - FUNC, mpi_rank, (int)(cache_ptr->pel_len)); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - entry_ptr = cache_ptr->pel_head_ptr; - while((entry_ptr != NULL) && - ((entries_cleared + entries_flushed + entries_delayed) - < num_candidates)) { - - /* If entry is marked for flush or for clear */ - if((entry_ptr->clear_on_unprotect||entry_ptr->flush_immediately)) { - - /* If this entry needs to be flushed last */ - if (entry_ptr->flush_me_last) { - - /* At this time, only the superblock supports being - flushed last. Conveniently, it also happens to be the only - entry that supports being flushed collectively, as well. Also - conveniently, it's always pinned, so we only need to check - for it while scanning the PEL here. Finally, it's never - included in a candidate list that excludes other dirty - entries in a cache, so we can handle this relatively simple - case here. - - For now, this function asserts this and saves the entry - to flush it after scanning the rest of the PEL list. - - If there are ever more entries that either need to be - flushed last and/or flushed collectively, this whole routine - will need to be reworked to handle all additional cases. As - it is the simple case of a single pinned entry needing - flushed last and collectively is just a minor addition to - this routine, but signficantly buffing up the usage of - flush_me_last or flush_me_collectively will require a more - intense rework of this function and potentially the function - of candidate lists as a whole. */ - - HDassert(entry_ptr->flush_me_collectively); - entries_to_flush_or_clear_last++; - entries_to_flush_collectively++; - HDassert(entries_to_flush_or_clear_last == 1); - HDassert(entries_to_flush_collectively == 1); - - /* Delay the entry. It will be flushed later. */ - delayed_ptr = entry_ptr; - entries_delayed++; - HDassert(entries_delayed == 1); - - } /* end if */ - - /* Else, this process needs to clear this entry. */ - else if (entry_ptr->clear_on_unprotect) { - HDassert(!entry_ptr->flush_immediately); - entry_ptr->clear_on_unprotect = FALSE; - clear_ptr = entry_ptr; - entry_ptr = entry_ptr->next; - entries_cleared++; - -#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) - HDfprintf(stdout, "%s:%d: clearing 0x%llx.\n", FUNC, mpi_rank, - (long long)clear_ptr->addr); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - if(H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - clear_ptr->type, - clear_ptr->addr, - H5C__FLUSH_CLEAR_ONLY_FLAG, - &first_flush, - TRUE) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") - } /* end else-if */ - - /* Else, if this process needs to independently flush this entry. */ - else if (entry_ptr->flush_immediately) { - entry_ptr->flush_immediately = FALSE; - flush_ptr = entry_ptr; - entry_ptr = entry_ptr->next; - entries_flushed++; - -#if ( H5C_APPLY_CANDIDATE_LIST__DEBUG > 1 ) - HDfprintf(stdout, "%s:%d: flushing 0x%llx.\n", FUNC, mpi_rank, - (long long)flush_ptr->addr); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - if(H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - flush_ptr->type, - flush_ptr->addr, - H5C__NO_FLAGS_SET, - &first_flush, - TRUE) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't flush entry.") - } /* end else-if */ - } /* end if */ - - /* Otherwise, this entry is not marked for flush or clear. Grab the next. */ - else { - entry_ptr = entry_ptr->next; - } /* end else */ - - } /* end while */ - -#if H5C_APPLY_CANDIDATE_LIST__DEBUG - HDfprintf(stdout, - "%s:%d: pel entries examined/cleared/flushed = %d/%d/%d.\n", - FUNC, mpi_rank, entries_examined, - entries_cleared, entries_flushed); - HDfprintf(stdout, "%s:%d: done.\n", FUNC, mpi_rank); - - HDfsync(stdout); -#endif /* H5C_APPLY_CANDIDATE_LIST__DEBUG */ - - /* ====================================================================== * - * Now, handle all delayed entries. * - * * - * This can *only* be the superblock at this time, so it's relatively * - * easy to deal with. We're collectively flushing the entry saved from * - * above. This will need to be handled differently if there are ever more * - * than one entry needing this special treatment.) * - * ====================================================================== */ - - if (delayed_ptr) { - - if (delayed_ptr->clear_on_unprotect) { - entry_ptr->clear_on_unprotect = FALSE; - entries_cleared++; - } else if (delayed_ptr->flush_immediately) { - entry_ptr->flush_immediately = FALSE; - entries_flushed++; - } /* end if */ - - if(H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - delayed_ptr->type, - delayed_ptr->addr, - H5C__NO_FLAGS_SET, - &first_flush, - TRUE) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, - "Can't flush entry collectively.") - - entries_flushed_collectively++; - entries_flushed_or_cleared_last++; - } /* end if */ - - /* ====================================================================== * - * Finished flushing everything. * - * ====================================================================== */ - - HDassert((entries_flushed == entries_to_flush)); - HDassert((entries_cleared == entries_to_clear)); - HDassert((entries_flushed_or_cleared_last == entries_to_flush_or_clear_last)); - HDassert((entries_flushed_collectively == entries_to_flush_collectively)); - - if((entries_flushed != entries_to_flush) || - (entries_cleared != entries_to_clear) || - (entries_flushed_or_cleared_last != entries_to_flush_or_clear_last) || - (entries_flushed_collectively != entries_to_flush_collectively)) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry count mismatch.") - -done: - if(candidate_assignment_table != NULL) - candidate_assignment_table = (int *)H5MM_xfree((void *)candidate_assignment_table); - - FUNC_LEAVE_NOAPI(ret_value) -} /* H5C_apply_candidate_list() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- - * Function: H5C_construct_candidate_list__clean_cache - * - * Purpose: Construct the list of entries that should be flushed to - * clean all entries in the cache. - * - * This function is used in managing sync points, and - * shouldn't be used elsewhere. - * - * Return: Success: SUCCEED - * - * Failure: FAIL - * - * Programmer: John Mainzer - * 3/17/10 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -herr_t -H5C_construct_candidate_list__clean_cache(H5C_t * cache_ptr) +static herr_t +H5C__epoch_marker_clear(const H5F_t H5_ATTR_UNUSED *f, void H5_ATTR_UNUSED * thing, hbool_t H5_ATTR_UNUSED about_to_destroy) { - size_t space_needed; - herr_t ret_value = SUCCEED; /* Return value */ + FUNC_ENTER_STATIC_NOERR /* Yes, even though this pushes an error on the stack */ - FUNC_ENTER_NOAPI(FAIL) + HERROR(H5E_CACHE, H5E_SYSTEM, "called unreachable fcn."); - HDassert( cache_ptr != NULL ); - HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - - /* As a sanity check, set space needed to the size of the skip list. - * This should be the sum total of the sizes of all the dirty entries - * in the metadata cache. - */ - space_needed = cache_ptr->slist_size; - - /* Recall that while we shouldn't have any protected entries at this - * point, it is possible that some dirty entries may reside on the - * pinned list at this point. - */ - HDassert( cache_ptr->slist_size <= - (cache_ptr->dLRU_list_size + cache_ptr->pel_size) ); - HDassert( cache_ptr->slist_len <= - (cache_ptr->dLRU_list_len + cache_ptr->pel_len) ); - - if(space_needed > 0) { /* we have work to do */ - H5C_cache_entry_t *entry_ptr; - int nominated_entries_count = 0; - size_t nominated_entries_size = 0; - haddr_t nominated_addr; - - HDassert( cache_ptr->slist_len > 0 ); - - /* Scan the dirty LRU list from tail forward and nominate sufficient - * entries to free up the necessary space. - */ - entry_ptr = cache_ptr->dLRU_tail_ptr; - while((nominated_entries_size < space_needed) && - (nominated_entries_count < cache_ptr->slist_len) && - (entry_ptr != NULL)) { - HDassert( ! (entry_ptr->is_protected) ); - HDassert( ! (entry_ptr->is_read_only) ); - HDassert( entry_ptr->ro_ref_count == 0 ); - HDassert( entry_ptr->is_dirty ); - HDassert( entry_ptr->in_slist ); - - nominated_addr = entry_ptr->addr; - if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed(1).") - - nominated_entries_size += entry_ptr->size; - nominated_entries_count++; - entry_ptr = entry_ptr->aux_prev; - } /* end while */ - HDassert( entry_ptr == NULL ); - - /* it is possible that there are some dirty entries on the - * protected entry list as well -- scan it too if necessary - */ - entry_ptr = cache_ptr->pel_head_ptr; - while((nominated_entries_size < space_needed) && - (nominated_entries_count < cache_ptr->slist_len) && - (entry_ptr != NULL)) { - if(entry_ptr->is_dirty) { - HDassert( ! (entry_ptr->is_protected) ); - HDassert( ! (entry_ptr->is_read_only) ); - HDassert( entry_ptr->ro_ref_count == 0 ); - HDassert( entry_ptr->is_dirty ); - HDassert( entry_ptr->in_slist ); - - nominated_addr = entry_ptr->addr; - if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed(2).") - - nominated_entries_size += entry_ptr->size; - nominated_entries_count++; - } /* end if */ - - entry_ptr = entry_ptr->next; - } /* end while */ - - HDassert( nominated_entries_count == cache_ptr->slist_len ); - HDassert( nominated_entries_size == space_needed ); - } /* end if */ - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5C_construct_candidate_list__clean_cache() */ -#endif /* H5_HAVE_PARALLEL */ + FUNC_LEAVE_NOAPI(FAIL) +} /* end H5C__epoch_marker_clear() */ -/*------------------------------------------------------------------------- - * Function: H5C_construct_candidate_list__min_clean - * - * Purpose: Construct the list of entries that should be flushed to - * get the cache back within its min clean constraints. - * - * This function is used in managing sync points, and - * shouldn't be used elsewhere. - * - * Return: Success: SUCCEED - * - * Failure: FAIL - * - * Programmer: John Mainzer - * 3/17/10 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -herr_t -H5C_construct_candidate_list__min_clean(H5C_t * cache_ptr) +static herr_t +H5C__epoch_marker_fsf_size(const void H5_ATTR_UNUSED * thing, size_t H5_ATTR_UNUSED *fsf_size_ptr) { - size_t space_needed = 0; - herr_t ret_value = SUCCEED; /* Return value */ + FUNC_ENTER_STATIC_NOERR /* Yes, even though this pushes an error on the stack */ - FUNC_ENTER_NOAPI(FAIL) + HERROR(H5E_CACHE, H5E_SYSTEM, "called unreachable fcn."); - HDassert( cache_ptr != NULL ); - HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + FUNC_LEAVE_NOAPI(FAIL) +} /* end H5C__epoch_marker_fsf_size() */ - /* compute the number of bytes (if any) that must be flushed to get the - * cache back within its min clean constraints. - */ - if(cache_ptr->max_cache_size > cache_ptr->index_size) { - if(((cache_ptr->max_cache_size - cache_ptr->index_size) + - cache_ptr->cLRU_list_size) >= cache_ptr->min_clean_size) - space_needed = 0; - else - space_needed = cache_ptr->min_clean_size - - ((cache_ptr->max_cache_size - cache_ptr->index_size) + - cache_ptr->cLRU_list_size); - } /* end if */ - else { - if(cache_ptr->min_clean_size <= cache_ptr->cLRU_list_size) - space_needed = 0; - else - space_needed = cache_ptr->min_clean_size - - cache_ptr->cLRU_list_size; - } /* end else */ - - if(space_needed > 0) { /* we have work to do */ - H5C_cache_entry_t *entry_ptr; - int nominated_entries_count = 0; - size_t nominated_entries_size = 0; - - HDassert( cache_ptr->slist_len > 0 ); - - /* Scan the dirty LRU list from tail forward and nominate sufficient - * entries to free up the necessary space. - */ - entry_ptr = cache_ptr->dLRU_tail_ptr; - while((nominated_entries_size < space_needed) && - (nominated_entries_count < cache_ptr->slist_len) && - (entry_ptr != NULL) && - (!entry_ptr->flush_me_last)) { - haddr_t nominated_addr; - - HDassert( ! (entry_ptr->is_protected) ); - HDassert( ! (entry_ptr->is_read_only) ); - HDassert( entry_ptr->ro_ref_count == 0 ); - HDassert( entry_ptr->is_dirty ); - HDassert( entry_ptr->in_slist ); - - nominated_addr = entry_ptr->addr; - if(H5AC_add_candidate((H5AC_t *)cache_ptr, nominated_addr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "H5AC_add_candidate() failed.") - - nominated_entries_size += entry_ptr->size; - nominated_entries_count++; - entry_ptr = entry_ptr->aux_prev; - } /* end while */ - HDassert( nominated_entries_count <= cache_ptr->slist_len ); - HDassert( nominated_entries_size >= space_needed ); - } /* end if */ - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* H5C_construct_candidate_list__min_clean() */ -#endif /* H5_HAVE_PARALLEL */ /*------------------------------------------------------------------------- @@ -1184,6 +499,9 @@ H5C_create(size_t max_cache_size, /* Tagging Field Initializations */ cache_ptr->ignore_tags = FALSE; + cache_ptr->slist_changed = FALSE; + cache_ptr->slist_change_in_pre_serialize = FALSE; + cache_ptr->slist_change_in_serialize = FALSE; cache_ptr->slist_len = 0; cache_ptr->slist_size = (size_t)0; @@ -1197,6 +515,9 @@ H5C_create(size_t max_cache_size, (cache_ptr->index)[i] = NULL; } + cache_ptr->entries_removed_counter = 0; + cache_ptr->last_entry_removed_ptr = NULL; + cache_ptr->pl_len = 0; cache_ptr->pl_size = (size_t)0; cache_ptr->pl_head_ptr = NULL; @@ -1271,11 +592,8 @@ H5C_create(size_t max_cache_size, /* Set non-zero/FALSE/NULL fields for epoch markers */ for ( i = 0; i < H5C__MAX_EPOCH_MARKERS; i++ ) { - (cache_ptr->epoch_marker_active)[i] = FALSE; -#ifndef NDEBUG ((cache_ptr->epoch_markers)[i]).magic = H5C__H5C_CACHE_ENTRY_T_MAGIC; -#endif /* NDEBUG */ ((cache_ptr->epoch_markers)[i]).addr = (haddr_t)i; ((cache_ptr->epoch_markers)[i]).type = &epoch_marker_class; } @@ -1530,9 +848,7 @@ H5C_def_auto_resize_rpt_fcn(H5C_t * cache_ptr, *------------------------------------------------------------------------- */ herr_t -H5C_dest(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id) +H5C_dest(H5F_t * f, hid_t dxpl_id) { H5C_t * cache_ptr = f->shared->cache; herr_t ret_value = SUCCEED; /* Return value */ @@ -1544,8 +860,7 @@ H5C_dest(H5F_t * f, HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); /* Flush and invalidate all cache entries */ - if(H5C_flush_invalidate_cache(f, primary_dxpl_id, secondary_dxpl_id, - H5C__NO_FLAGS_SET) < 0 ) + if(H5C_flush_invalidate_cache(f, dxpl_id, H5C__NO_FLAGS_SET) < 0 ) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush cache") if(cache_ptr->slist_ptr != NULL) { @@ -1570,7 +885,9 @@ H5C_dest(H5F_t * f, #endif /* H5C_DO_SANITY_CHECKS */ #endif /* NDEBUG */ +#ifndef NDEBUG cache_ptr->magic = 0; +#endif /* NDEBUG */ cache_ptr = H5FL_FREE(H5C_t, cache_ptr); @@ -1595,17 +912,16 @@ done: *------------------------------------------------------------------------- */ herr_t -H5C_expunge_entry(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - const H5C_class_t * type, - haddr_t addr, - unsigned flags) +H5C_expunge_entry(H5F_t *f, hid_t dxpl_id, const H5C_class_t *type, + haddr_t addr, unsigned flags) { H5C_t * cache_ptr; - herr_t result; - hbool_t first_flush = TRUE; H5C_cache_entry_t * entry_ptr = NULL; + unsigned flush_flags = (H5C__FLUSH_INVALIDATE_FLAG | H5C__FLUSH_CLEAR_ONLY_FLAG); +#if H5C_DO_SANITY_CHECKS + hbool_t entry_was_dirty; + hsize_t entry_size; +#endif /* H5C_DO_SANITY_CHECKS */ herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_NOAPI(FAIL) @@ -1616,8 +932,6 @@ H5C_expunge_entry(H5F_t * f, HDassert(cache_ptr); HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); HDassert(type); - HDassert(type->clear); - HDassert(type->dest); HDassert(H5F_addr_defined(addr)); #if H5C_DO_EXTREME_SANITY_CHECKS @@ -1641,27 +955,37 @@ H5C_expunge_entry(H5F_t * f, if(entry_ptr->is_pinned) HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "Target entry is pinned.") - /* Pass along 'free file space' flag to cache client */ - entry_ptr->free_file_space_on_destroy = ( (flags & H5C__FREE_FILE_SPACE_FLAG) != 0 ); - - /* If we get this far, call H5C_flush_single_entry() with the + /* If we get this far, call H5C__flush_single_entry() with the * H5C__FLUSH_INVALIDATE_FLAG and the H5C__FLUSH_CLEAR_ONLY_FLAG. * This will clear the entry, and then delete it from the cache. */ - result = H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - entry_ptr->type, - entry_ptr->addr, - H5C__FLUSH_INVALIDATE_FLAG | H5C__FLUSH_CLEAR_ONLY_FLAG, - &first_flush, - TRUE); - if ( result < 0 ) { + /* Pass along 'free file space' flag to cache client. */ + flush_flags |= (flags & H5C__FREE_FILE_SPACE_FLAG); + +#if H5C_DO_SANITY_CHECKS + entry_was_dirty = entry_ptr->is_dirty; + entry_size = entry_ptr->size; +#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ + + /* Delete the entry from the skip list on destroy */ + flush_flags |= H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG; + + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flush_flags, NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, "can't flush entry") - HGOTO_ERROR(H5E_CACHE, H5E_CANTEXPUNGE, FAIL, \ - "H5C_flush_single_entry() failed.") +#if H5C_DO_SANITY_CHECKS + if ( entry_was_dirty ) + { + /* we have just removed an entry from the skip list. Thus + * we must touch up cache_ptr->slist_len_increase and + * cache_ptr->slist_size_increase to keep from skewing + * the sanity checks on flushes. + */ + cache_ptr->slist_len_increase -= 1; + cache_ptr->slist_size_increase -= (int64_t)(entry_size); } +#endif /* H5C_DO_SANITY_CHECKS */ done: #if H5C_DO_EXTREME_SANITY_CHECKS @@ -1698,20 +1022,33 @@ done: * Programmer: John Mainzer * 6/2/04 * + * Changes: Modified function to test for slist chamges in + * pre_serialize and serialize callbacks, and re-start + * scans through the slist when such changes occur. + * + * This has been a potential problem for some time, + * and there has been code in this function to deal + * with elements of this issue. However the shift + * to the V3 cache in combination with the activities + * of some of the cache clients (in particular the + * free space manager and the fractal heap) have + * made this re-work necessary. + * + * JRM -- 12/13/14 + * *------------------------------------------------------------------------- */ herr_t -H5C_flush_cache(H5F_t *f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, unsigned flags) +H5C_flush_cache(H5F_t *f, hid_t dxpl_id, unsigned flags) { H5C_t * cache_ptr = f->shared->cache; - herr_t status; herr_t ret_value = SUCCEED; hbool_t destroy; hbool_t flushed_entries_last_pass; hbool_t flush_marked_entries; - hbool_t first_flush = TRUE; hbool_t ignore_protected; hbool_t tried_to_flush_protected_entry = FALSE; + hbool_t restart_slist_scan; int32_t passes = 0; int32_t protected_entries = 0; H5SL_node_t * node_ptr = NULL; @@ -1722,6 +1059,10 @@ H5C_flush_cache(H5F_t *f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, unsign int64_t flushed_entries_size = 0; int64_t initial_slist_len = 0; size_t initial_slist_size = 0; + int64_t entry_size_change; + int64_t * entry_size_change_ptr = &entry_size_change; +#else /* H5C_DO_SANITY_CHECKS */ + int64_t * entry_size_change_ptr = NULL; #endif /* H5C_DO_SANITY_CHECKS */ FUNC_ENTER_NOAPI(FAIL) @@ -1757,20 +1098,8 @@ H5C_flush_cache(H5F_t *f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, unsign cache_ptr->flush_in_progress = TRUE; if ( destroy ) { - - status = H5C_flush_invalidate_cache(f, - primary_dxpl_id, - secondary_dxpl_id, - flags); - - if ( status < 0 ) { - - /* This shouldn't happen -- if it does, we are toast so - * just scream and die. - */ - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ - "flush invalidate failed.") - } + if(H5C_flush_invalidate_cache(f, dxpl_id, flags) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "flush invalidate failed.") } else { /* When we are only flushing marked entries, the slist will usually * still contain entries when we have flushed everything we should. @@ -1780,6 +1109,18 @@ H5C_flush_cache(H5F_t *f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, unsign flushed_entries_last_pass = TRUE; + /* set the cache_ptr->slist_change_in_pre_serialize and + * cache_ptr->slist_change_in_serialize to false. + * + * These flags are set to TRUE by H5C__flush_single_entry if the + * slist is modified by a pre_serialize or serialize call respectively. + * H5C_flush_cache uses these flags to detect any modifications + * to the slist that might corrupt the scan of the slist -- and + * restart the scan in this event. + */ + cache_ptr->slist_change_in_pre_serialize = FALSE; + cache_ptr->slist_change_in_serialize = FALSE; + while ( ( passes < H5C__MAX_PASSES_ON_FLUSH ) && ( cache_ptr->slist_len != 0 ) && ( protected_entries == 0 ) && @@ -1797,18 +1138,6 @@ H5C_flush_cache(H5F_t *f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, unsign { hbool_t flushed_during_dep_loop = FALSE; - /* Start at beginning of skip list each time */ - node_ptr = H5SL_first(cache_ptr->slist_ptr); - HDassert( node_ptr != NULL ); - - /* Get cache entry for this node */ - next_entry_ptr = (H5C_cache_entry_t *)H5SL_item(node_ptr); - if ( NULL == next_entry_ptr ) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "next_entry_ptr == NULL ?!?!") - HDassert( next_entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC ); - HDassert( next_entry_ptr->is_dirty ); - HDassert( next_entry_ptr->in_slist ); - #if H5C_DO_SANITY_CHECKS /* For sanity checking, try to verify that the skip list has * the expected size and number of entries at the end of each @@ -1818,8 +1147,10 @@ H5C_flush_cache(H5F_t *f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, unsign * or may not flush all the entries in the slist. * * To make things more entertaining, with the advent of the - * fractal heap, the entry flush callback can cause entries - * to be dirtied, resized, and/or moved. + * fractal heap, the entry serialize callback can cause entries + * to be dirtied, resized, and/or moved. Also, the + * pre_serialize callback can result in an entry being + * removed from the cache via the take ownership flag. * * To deal with this, we first make note of the initial * skip list length and size: @@ -1834,7 +1165,8 @@ H5C_flush_cache(H5F_t *f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, unsign flushed_entries_size = 0; /* As mentioned above, there is the possibility that - * entries will be dirtied, resized, and/or flushed during + * entries will be dirtied, resized, flushed, or removed + * from the cache via the take ownership flag during * our pass through the skip list. To capture the number * of entries added, and the skip list size delta, * zero the slist_len_increase and slist_size_increase of @@ -1851,53 +1183,60 @@ H5C_flush_cache(H5F_t *f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, unsign */ #endif /* H5C_DO_SANITY_CHECKS */ - while ( node_ptr != NULL ) + restart_slist_scan = TRUE; + + while ( ( restart_slist_scan ) || ( node_ptr != NULL ) ) { - entry_ptr = next_entry_ptr; + if ( restart_slist_scan ) + { + restart_slist_scan = FALSE; - /* With the advent of the fractal heap, it is possible - * that the flush callback will dirty and/or resize - * other entries in the cache. In particular, while - * Quincey has promised me that this will never happen, - * it is possible that the flush callback for an - * entry may protect an entry that is not in the cache, - * perhaps causing the cache to flush and possibly - * evict the entry associated with node_ptr to make - * space for the new entry. - * - * Thus we do a bit of extra sanity checking on entry_ptr, - * and break out of this scan of the skip list if we - * detect minor problems. We have a bit of leaway on the - * number of passes though the skip list, so this shouldn't - * be an issue in the flush in and of itself, as it should - * be all but impossible for this to happen more than once - * in any flush. - * - * Observe that that breaking out of the scan early - * shouldn't break the sanity checks just after the end - * of this while loop. - * - * If an entry has merely been marked clean and removed from - * the s-list, we simply break out of the scan. - * - * If the entry has been evicted, we flag an error and - * exit. - */ -#ifndef NDEBUG - if ( entry_ptr->magic != H5C__H5C_CACHE_ENTRY_T_MAGIC ) { + /* Start at beginning of skip list */ + node_ptr = H5SL_first(cache_ptr->slist_ptr); - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry_ptr->magic is invalid ?!?!") + if ( node_ptr == NULL ) + { + /* the slist is empty -- break out of inner loop */ + break; + } + HDassert( node_ptr != NULL ); - } else -#endif /* NDEBUG */ - if ( ( ! entry_ptr->is_dirty ) || - ( ! entry_ptr->in_slist ) ) { + /* Get cache entry for this node */ + next_entry_ptr = + (H5C_cache_entry_t *)H5SL_item(node_ptr); - /* the s-list has been modified out from under us. - * break out of the loop. - */ - goto end_of_inner_loop;; + if(NULL == next_entry_ptr) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "next_entry_ptr == NULL ?!?!") + + HDassert( next_entry_ptr->magic == \ + H5C__H5C_CACHE_ENTRY_T_MAGIC ); + HDassert( next_entry_ptr->is_dirty ); + HDassert( next_entry_ptr->in_slist ); } + + entry_ptr = next_entry_ptr; + + /* With the advent of the fractal heap, the free space + * manager, and the version 3 cache, it is possible + * that the pre-serialize or serialize callback will + * dirty, resize, or take ownership of other entries + * in the cache. + * + * To deal with this, I have inserted code to detect any + * change in the skip list not directly under the control + * of this function. If such modifications are detected, + * we must re-start the scan of the skip list to avoid + * the possibility that the target of the next_entry_ptr + * may have been flushed or deleted from the cache. + * + * To verify that all such possibilities have been dealt + * with, we do a bit of extra sanity checking on + * entry_ptr. + */ + HDassert(entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC); + HDassert(entry_ptr->in_slist); + HDassert(entry_ptr->is_dirty); /* increment node pointer now, before we delete its target * from the slist. @@ -1911,6 +1250,7 @@ H5C_flush_cache(H5F_t *f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, unsign HDassert( next_entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC ); HDassert( next_entry_ptr->is_dirty ); HDassert( next_entry_ptr->in_slist ); + HDassert( entry_ptr != next_entry_ptr ); } else { next_entry_ptr = NULL; } @@ -1927,9 +1267,9 @@ H5C_flush_cache(H5F_t *f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, unsign if ( entry_ptr->is_protected ) { - /* we probably have major problems -- but lets flush - * everything we can before we decide whether to flag - * an error. + /* we probably have major problems -- but lets + * flush everything we can before we decide + * whether to flag an error. */ tried_to_flush_protected_entry = TRUE; protected_entries++; @@ -1944,24 +1284,39 @@ H5C_flush_cache(H5F_t *f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, unsign #if H5C_DO_SANITY_CHECKS flushed_entries_count++; flushed_entries_size += (int64_t)entry_ptr->size; + entry_size_change = 0; #endif /* H5C_DO_SANITY_CHECKS */ - status = H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - NULL, - entry_ptr->addr, - flags, - &first_flush, - FALSE); - if ( status < 0 ) { - - /* This shouldn't happen -- if it does, - * we are toast so just scream and die. - */ - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ - "dirty pinned entry flush failed.") - } /* end if */ + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flags, entry_size_change_ptr) < 0 ) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "dirty pinned entry flush failed.") + +#if H5C_DO_SANITY_CHECKS + /* it is possible that the entry size changed + * during flush -- update flushed_entries_size + * to account for this. + */ + flushed_entries_size += entry_size_change; +#endif /* H5C_DO_SANITY_CHECKS */ + flushed_during_dep_loop = TRUE; + + if ((cache_ptr->slist_change_in_serialize) || + (cache_ptr->slist_change_in_pre_serialize)) + { + /* The slist has been modified by something + * other than the simple removal of the + * of the flushed entry after the flush. + * + * This has the potential to corrupt the + * scan through the slist, so restart it. + */ + restart_slist_scan = TRUE; + cache_ptr->slist_change_in_pre_serialize + = FALSE; + cache_ptr->slist_change_in_serialize + = FALSE; + + H5C__UPDATE_STATS_FOR_SLIST_SCAN_RESTART(cache_ptr) + } } /* end if */ else if(entry_ptr->flush_dep_height < curr_flush_dep_height) /* This shouldn't happen -- if it does, just scream and die. */ @@ -1977,31 +1332,46 @@ H5C_flush_cache(H5F_t *f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, unsign #if H5C_DO_SANITY_CHECKS flushed_entries_count++; flushed_entries_size += (int64_t)entry_ptr->size; + entry_size_change = 0; +#endif /* H5C_DO_SANITY_CHECKS */ + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flags, entry_size_change_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush entry.") + +#if H5C_DO_SANITY_CHECKS + /* it is possible that the entry size changed + * during flush -- update flushed_entries_size + * to account for this. + */ + flushed_entries_size += entry_size_change; #endif /* H5C_DO_SANITY_CHECKS */ - status = H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - NULL, - entry_ptr->addr, - flags, - &first_flush, - FALSE); - if ( status < 0 ) { - - /* This shouldn't happen -- if it does, - * we are toast so just scream and die. + + flushed_during_dep_loop = TRUE; + + if ((cache_ptr->slist_change_in_serialize) || + (cache_ptr->slist_change_in_pre_serialize)) + { + /* The slist has been modified by something + * other than the simple removal of the + * of the flushed entry after the flush. + * + * This has the potential to corrupt the + * scan through the slist, so restart it. */ - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ - "Can't flush entry.") + restart_slist_scan = TRUE; + cache_ptr->slist_change_in_pre_serialize + = FALSE; + cache_ptr->slist_change_in_serialize + = FALSE; + + H5C__UPDATE_STATS_FOR_SLIST_SCAN_RESTART(cache_ptr) } - flushed_during_dep_loop = TRUE; } /* end if */ else if(entry_ptr->flush_dep_height < curr_flush_dep_height) /* This shouldn't happen -- if it does, just scream and die. */ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "dirty entry below current flush dep. height.") } /* end else */ } /* end if */ - } /* while ( node_ptr != NULL ) */ + } /* while ( ( restart_slist_scan ) || ( node_ptr != NULL ) ) */ /* Check for incrementing flush dependency height */ if(flushed_during_dep_loop) { @@ -2020,7 +1390,8 @@ H5C_flush_cache(H5F_t *f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, unsign curr_flush_dep_height++; } /* while ( curr_flush_dep_height <= H5C__NUM_FLUSH_DEP_HEIGHTS) */ -end_of_inner_loop: + + passes++; #if H5C_DO_SANITY_CHECKS /* Verify that the slist size and length are as expected. */ @@ -2032,8 +1403,6 @@ end_of_inner_loop: flushed_entries_size) == cache_ptr->slist_size ); #endif /* H5C_DO_SANITY_CHECKS */ - passes++; - } /* while */ HDassert( protected_entries <= cache_ptr->pl_len ); @@ -2098,12 +1467,10 @@ done: */ herr_t H5C_flush_to_min_clean(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id) + hid_t dxpl_id) { H5C_t * cache_ptr; herr_t result; - hbool_t first_flush = TRUE; hbool_t write_permitted; #if 0 /* modified code -- commented out for now */ int i; @@ -2146,11 +1513,9 @@ H5C_flush_to_min_clean(H5F_t * f, } #if 1 /* original code */ result = H5C_make_space_in_cache(f, - primary_dxpl_id, - secondary_dxpl_id, + dxpl_id, (size_t)0, - write_permitted, - &first_flush); + write_permitted); if ( result < 0 ) { @@ -2559,6 +1924,34 @@ done: /*------------------------------------------------------------------------- + * Function: H5C_get_aux_ptr + * + * Purpose: Get the aux_ptr field from the cache. + * + * This field will either be NULL (when accessing a file serially) + * or contains a pointer to the auxiliary info for parallel I/O. + * + * Return: NULL/non-NULL (can't fail) + * + * Programmer: Quincey Koziol + * 6/29/15 + * + *------------------------------------------------------------------------- + */ +void * +H5C_get_aux_ptr(const H5C_t *cache_ptr) +{ + FUNC_ENTER_NOAPI_NOERR + + /* Check arguments */ + HDassert(cache_ptr); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + + FUNC_LEAVE_NOAPI(cache_ptr->aux_ptr) +} /* H5C_get_aux_ptr() */ + + +/*------------------------------------------------------------------------- * Function: H5C_get_trace_file_ptr * * Purpose: Get the trace_file_ptr field from the cache. @@ -2645,8 +2038,7 @@ H5C_get_trace_file_ptr_from_entry(const H5C_cache_entry_t *entry_ptr) */ herr_t H5C_insert_entry(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, + hid_t dxpl_id, const H5C_class_t * type, haddr_t addr, void * thing, @@ -2654,7 +2046,6 @@ H5C_insert_entry(H5F_t * f, { H5C_t * cache_ptr; herr_t result; - hbool_t first_flush = TRUE; hbool_t insert_pinned; hbool_t flush_last; #ifdef H5_HAVE_PARALLEL @@ -2678,8 +2069,6 @@ H5C_insert_entry(H5F_t * f, HDassert( cache_ptr ); HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); HDassert( type ); - HDassert( type->flush ); - HDassert( type->size ); HDassert( H5F_addr_defined(addr) ); HDassert( thing ); @@ -2718,15 +2107,16 @@ H5C_insert_entry(H5F_t * f, HGOTO_ERROR(H5E_CACHE, H5E_CANTINS, FAIL, "duplicate entry in cache.") } /* end if */ -#ifndef NDEBUG entry_ptr->magic = H5C__H5C_CACHE_ENTRY_T_MAGIC; -#endif /* NDEBUG */ entry_ptr->cache_ptr = cache_ptr; entry_ptr->addr = addr; entry_ptr->type = type; + entry_ptr->image_ptr = NULL; + entry_ptr->image_up_to_date = FALSE; + /* Apply tag to newly inserted entry */ - if(H5C_tag_entry(cache_ptr, entry_ptr, primary_dxpl_id) < 0) + if(H5C_tag_entry(cache_ptr, entry_ptr, dxpl_id) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTTAG, FAIL, "Cannot tag metadata entry") entry_ptr->is_protected = FALSE; @@ -2747,10 +2137,27 @@ H5C_insert_entry(H5F_t * f, /* not protected, so can't be dirtied */ entry_ptr->dirtied = FALSE; - /* Retrieve the size of the thing */ - if((type->size)(f, thing, &(entry_ptr->size)) < 0) + /* Retrieve the size of the thing. Set the compressed field to FALSE + * and the compressed_size field to zero first, as they may not be + * initialized by the image_len call. + */ + entry_ptr->compressed = FALSE; + entry_ptr->compressed_size = 0; + if((type->image_len)(thing, &(entry_ptr->size), &(entry_ptr->compressed), + &(entry_ptr->compressed_size)) < 0) HGOTO_ERROR(H5E_RESOURCE, H5E_CANTGETSIZE, FAIL, "Can't get size of thing") HDassert(entry_ptr->size > 0 && entry_ptr->size < H5C_MAX_ENTRY_SIZE); + HDassert(((type->flags & H5C__CLASS_COMPRESSED_FLAG) != 0) || + (entry_ptr->compressed == FALSE)); + + /* entry has just been inserted -- thus compressed size cannot have + * been computed yet. Thus if entry_ptr->compressed is TRUE, + * entry_ptr->size must equal entry_ptr->compressed_size. + */ + HDassert((entry_ptr->compressed == FALSE) || + (entry_ptr->size == entry_ptr->compressed_size)); + HDassert((entry_ptr->compressed == TRUE) || + (entry_ptr->compressed_size == 0)); entry_ptr->in_slist = FALSE; @@ -2761,7 +2168,6 @@ H5C_insert_entry(H5F_t * f, entry_ptr->flush_in_progress = FALSE; entry_ptr->destroy_in_progress = FALSE; - entry_ptr->free_file_space_on_destroy = FALSE; /* Initialize flush dependency height fields */ entry_ptr->flush_dep_parent = NULL; @@ -2860,11 +2266,9 @@ H5C_insert_entry(H5F_t * f, */ result = H5C_make_space_in_cache(f, - primary_dxpl_id, - secondary_dxpl_id, + dxpl_id, space_needed, - write_permitted, - &first_flush); + write_permitted); if ( result < 0 ) { @@ -2928,301 +2332,6 @@ done: /*------------------------------------------------------------------------- - * - * Function: H5C_mark_entries_as_clean - * - * Purpose: When the H5C code is used to implement the metadata caches - * in PHDF5, only the cache with MPI_rank 0 is allowed to - * actually write entries to disk -- all other caches must - * retain dirty entries until they are advised that the - * entries are clean. - * - * This function exists to allow the H5C code to receive these - * notifications. - * - * The function receives a list of entry base addresses - * which must refer to dirty entries in the cache. If any - * of the entries are either clean or don't exist, the - * function flags an error. - * - * The function scans the list of entries and flushes all - * those that are currently unprotected with the - * H5C__FLUSH_CLEAR_ONLY_FLAG. Those that are currently - * protected are flagged for clearing when they are - * unprotected. - * - * Return: Non-negative on success/Negative on failure - * - * Programmer: John Mainzer - * 7/5/05 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -herr_t -H5C_mark_entries_as_clean(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - int32_t ce_array_len, - haddr_t * ce_array_ptr) -{ - H5C_t * cache_ptr; - hbool_t first_flush = TRUE; - int entries_cleared; - int entries_examined; - int i; - int initial_list_len; - haddr_t addr; -#if H5C_DO_SANITY_CHECKS - int pinned_entries_marked = 0; - int protected_entries_marked = 0; - int other_entries_marked = 0; - haddr_t last_addr; -#endif /* H5C_DO_SANITY_CHECKS */ - H5C_cache_entry_t * clear_ptr = NULL; - H5C_cache_entry_t * entry_ptr = NULL; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_NOAPI(FAIL) - - HDassert( f ); - HDassert( f->shared ); - cache_ptr = f->shared->cache; - HDassert( cache_ptr ); - HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - - HDassert( ce_array_len > 0 ); - HDassert( ce_array_ptr != NULL ); - -#if H5C_DO_EXTREME_SANITY_CHECKS - if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) || - ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) || - ( H5C_validate_lru_list(cache_ptr) < 0 ) ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "an extreme sanity check failed on entry.\n"); - } -#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ - - for ( i = 0; i < ce_array_len; i++ ) - { - addr = ce_array_ptr[i]; - -#if H5C_DO_SANITY_CHECKS - if ( i == 0 ) { - - last_addr = addr; - - } else { - - if ( last_addr == addr ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "Duplicate entry in cleaned list.\n"); - - } else if ( last_addr > addr ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "cleaned list not sorted.\n"); - } - } - -#if H5C_DO_EXTREME_SANITY_CHECKS - if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) || - ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) || - ( H5C_validate_lru_list(cache_ptr) < 0 ) ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "an extreme sanity check failed in for loop.\n"); - } -#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ -#endif /* H5C_DO_SANITY_CHECKS */ - - HDassert( H5F_addr_defined(addr) ); - - H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL) - - if ( entry_ptr == NULL ) { -#if H5C_DO_SANITY_CHECKS - HDfprintf(stdout, - "H5C_mark_entries_as_clean: entry[%d] = %ld not in cache.\n", - (int)i, - (long)addr); -#endif /* H5C_DO_SANITY_CHECKS */ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "Listed entry not in cache?!?!?.") - - } else if ( ! entry_ptr->is_dirty ) { - -#if H5C_DO_SANITY_CHECKS - HDfprintf(stdout, - "H5C_mark_entries_as_clean: entry %ld is not dirty!?!\n", - (long)addr); -#endif /* H5C_DO_SANITY_CHECKS */ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "Listed entry not dirty?!?!?.") -#if 0 /* original code */ - } else if ( entry_ptr->is_protected ) { - - entry_ptr->clear_on_unprotect = TRUE; - - } else { - - if ( H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - entry_ptr->type, - addr, - H5C__FLUSH_CLEAR_ONLY_FLAG, - &first_flush, - TRUE) < 0 ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") - } - } -#else /* modified code */ - } else { - /* Mark the entry to be cleared on unprotect. We will - * scan the LRU list shortly, and clear all those entries - * not currently protected. - */ - entry_ptr->clear_on_unprotect = TRUE; -#if H5C_DO_SANITY_CHECKS - if ( entry_ptr->is_protected ) { - - protected_entries_marked++; - - } else if ( entry_ptr->is_pinned ) { - - pinned_entries_marked++; - - } else { - - other_entries_marked++; - } -#endif /* H5C_DO_SANITY_CHECKS */ - } -#endif /* end modified code */ - } -#if 1 /* modified code */ - /* Scan through the LRU list from back to front, and flush the - * entries whose clear_on_unprotect flags are set. Observe that - * any protected entries will not be on the LRU, and therefore - * will not be flushed at this time. - */ - - entries_cleared = 0; - entries_examined = 0; - initial_list_len = cache_ptr->LRU_list_len; - entry_ptr = cache_ptr->LRU_tail_ptr; - - while ( ( entry_ptr != NULL ) && - ( entries_examined <= initial_list_len ) && - ( entries_cleared < ce_array_len ) ) - { - if ( entry_ptr->clear_on_unprotect ) { - - entry_ptr->clear_on_unprotect = FALSE; - clear_ptr = entry_ptr; - entry_ptr = entry_ptr->prev; - entries_cleared++; - - if ( H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - clear_ptr->type, - clear_ptr->addr, - H5C__FLUSH_CLEAR_ONLY_FLAG, - &first_flush, - TRUE) < 0 ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") - } - } else { - - entry_ptr = entry_ptr->prev; - } - entries_examined++; - } - -#if H5C_DO_SANITY_CHECKS - HDassert( entries_cleared == other_entries_marked ); -#endif /* H5C_DO_SANITY_CHECKS */ - - /* It is also possible that some of the cleared entries are on the - * pinned list. Must scan that also. - */ - - entry_ptr = cache_ptr->pel_head_ptr; - - while ( entry_ptr != NULL ) - { - if ( entry_ptr->clear_on_unprotect ) { - - entry_ptr->clear_on_unprotect = FALSE; - clear_ptr = entry_ptr; - entry_ptr = entry_ptr->next; - entries_cleared++; - - if ( H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - clear_ptr->type, - clear_ptr->addr, - H5C__FLUSH_CLEAR_ONLY_FLAG, - &first_flush, - TRUE) < 0 ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Can't clear entry.") - } - } else { - - entry_ptr = entry_ptr->next; - } - } - -#if H5C_DO_SANITY_CHECKS - HDassert( entries_cleared == pinned_entries_marked + other_entries_marked ); - HDassert( entries_cleared + protected_entries_marked == ce_array_len ); -#endif /* H5C_DO_SANITY_CHECKS */ - - HDassert( ( entries_cleared == ce_array_len ) || - ( (ce_array_len - entries_cleared) <= cache_ptr->pl_len ) ); - -#if H5C_DO_SANITY_CHECKS - i = 0; - entry_ptr = cache_ptr->pl_head_ptr; - while ( entry_ptr != NULL ) - { - if ( entry_ptr->clear_on_unprotect ) { - - i++; - } - entry_ptr = entry_ptr->next; - } - HDassert( (entries_cleared + i) == ce_array_len ); -#endif /* H5C_DO_SANITY_CHECKS */ -#endif /* modified code */ - -done: - -#if H5C_DO_EXTREME_SANITY_CHECKS - if ( ( H5C_validate_protected_entry_list(cache_ptr) < 0 ) || - ( H5C_validate_pinned_entry_list(cache_ptr) < 0 ) || - ( H5C_validate_lru_list(cache_ptr) < 0 ) ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "an extreme sanity check failed on exit.\n"); - } -#endif /* H5C_DO_EXTREME_SANITY_CHECKS */ - - FUNC_LEAVE_NOAPI(ret_value) - -} /* H5C_mark_entries_as_clean() */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- * Function: H5C_mark_entry_dirty * * Purpose: Mark a pinned or protected entry as dirty. The target entry @@ -3278,6 +2387,7 @@ H5C_mark_entry_dirty(void *thing) /* mark the entry as dirty if it isn't already */ entry_ptr->is_dirty = TRUE; + entry_ptr->image_up_to_date = FALSE; if ( was_pinned_unprotected_and_clean ) { @@ -3427,36 +2537,32 @@ H5C_move_entry(H5C_t * cache_ptr, was_dirty = entry_ptr->is_dirty; #endif /* H5C_MAINTAIN_CLEAN_AND_DIRTY_LRU_LISTS */ - if ( ! ( entry_ptr->flush_in_progress ) ) { - - entry_ptr->is_dirty = TRUE; - } + entry_ptr->is_dirty = TRUE; + /* This shouldn't be needed, but it keeps the test code happy */ + entry_ptr->image_up_to_date = FALSE; H5C__INSERT_IN_INDEX(cache_ptr, entry_ptr, FAIL) - if ( ! ( entry_ptr->flush_in_progress ) ) { - - H5C__INSERT_ENTRY_IN_SLIST(cache_ptr, entry_ptr, FAIL) + H5C__INSERT_ENTRY_IN_SLIST(cache_ptr, entry_ptr, FAIL) #if H5C_DO_SANITY_CHECKS - if ( removed_entry_from_slist ) { - - /* we just removed the entry from the slist. Thus we - * must touch up cache_ptr->slist_len_increase and - * cache_ptr->slist_size_increase to keep from skewing - * the sanity checks. - */ - HDassert( cache_ptr->slist_len_increase > 1 ); - HDassert( cache_ptr->slist_size_increase > - (int64_t)(entry_ptr->size) ); + if ( removed_entry_from_slist ) { - cache_ptr->slist_len_increase -= 1; - cache_ptr->slist_size_increase -= (int64_t)(entry_ptr->size); - } + /* we just removed the entry from the slist. Thus we + * must touch up cache_ptr->slist_len_increase and + * cache_ptr->slist_size_increase to keep from skewing + * the sanity checks. + */ + cache_ptr->slist_len_increase -= 1; + cache_ptr->slist_size_increase -= (int64_t)(entry_ptr->size); + } #endif /* H5C_DO_SANITY_CHECKS */ + if ( ! ( entry_ptr->flush_in_progress ) ) { + + /* skip the update if a flush is in progress */ H5C__UPDATE_RP_FOR_MOVE(cache_ptr, entry_ptr, was_dirty, FAIL) } } @@ -3536,6 +2642,11 @@ H5C_resize_entry(void *thing, size_t new_size) /* mark the entry as dirty if it isn't already */ entry_ptr->is_dirty = TRUE; + entry_ptr->image_up_to_date = FALSE; + + /* Release the current image */ + if( entry_ptr->image_ptr ) + entry_ptr->image_ptr = H5MM_xfree(entry_ptr->image_ptr); /* do a flash cache size increase if appropriate */ if ( cache_ptr->flash_size_increase_possible ) { @@ -3743,18 +2854,6 @@ done: * or flushed -- nor may it be accessed by another call to * H5C_protect. Any attempt to do so will result in a failure. * - * The primary_dxpl_id and secondary_dxpl_id parameters - * specify the dxpl_ids used on the first write occasioned - * by the insertion (primary_dxpl_id), and on all subsequent - * writes (secondary_dxpl_id). This is useful in the - * metadata cache, but may not be needed elsewhere. If so, - * just use the same dxpl_id for both parameters. - * - * All reads are performed with the primary_dxpl_id. - * - * Similarly, the primary_dxpl_id is passed to the - * check_write_permitted function if it is called. - * * Return: Success: Ptr to the desired entry * Failure: NULL * @@ -3776,12 +2875,21 @@ done: * entries long before we actually have to evict something * to make space. * + * JRM -- 9/1/14 + * Replace the old rw parameter with the flags parameter. + * This allows H5C_protect to accept flags other than + * H5C__READ_ONLY_FLAG. + * + * Added support for the H5C__FLUSH_LAST_FLAG and + * H5C__FLUSH_COLLECTIVELY_FLAG flags. At present, these + * flags are only applied if the entry is not in cache, and + * is loaded into the cache as a result of this call. + * *------------------------------------------------------------------------- */ void * H5C_protect(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, + hid_t dxpl_id, const H5C_class_t * type, haddr_t addr, void * udata, @@ -3789,16 +2897,17 @@ H5C_protect(H5F_t * f, { H5C_t * cache_ptr; hbool_t hit; - hbool_t first_flush; hbool_t have_write_permitted = FALSE; hbool_t read_only = FALSE; + hbool_t flush_last; +#ifdef H5_HAVE_PARALLEL + hbool_t flush_collectively; +#endif /* H5_HAVE_PARALLEL */ hbool_t write_permitted; herr_t result; size_t empty_space; void * thing; H5C_cache_entry_t * entry_ptr; - haddr_t tag = HADDR_UNDEF; - H5P_genplist_t *dxpl; /* dataset transfer property list */ void * ret_value; /* Return value */ FUNC_ENTER_NOAPI(NULL) @@ -3812,8 +2921,6 @@ H5C_protect(H5F_t * f, HDassert( cache_ptr ); HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); HDassert( type ); - HDassert( type->flush ); - HDassert( type->load ); HDassert( H5F_addr_defined(addr) ); #if H5C_DO_EXTREME_SANITY_CHECKS @@ -3827,6 +2934,10 @@ H5C_protect(H5F_t * f, #endif /* H5C_DO_EXTREME_SANITY_CHECKS */ read_only = ( (flags & H5C__READ_ONLY_FLAG) != 0 ); + flush_last = ( (flags & H5C__FLUSH_LAST_FLAG) != 0 ); +#ifdef H5_HAVE_PARALLEL + flush_collectively = ( (flags & H5C__FLUSH_COLLECTIVELY_FLAG) != 0 ); +#endif /* H5_HAVE_PARALLEL */ /* first check to see if the target is in cache */ H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, NULL) @@ -3838,6 +2949,10 @@ H5C_protect(H5F_t * f, HGOTO_ERROR(H5E_CACHE, H5E_BADTYPE, NULL, "incorrect cache entry type") #if H5C_DO_TAGGING_SANITY_CHECKS +{ + H5P_genplist_t *dxpl; /* dataset transfer property list */ + haddr_t tag = HADDR_UNDEF; + /* The entry is already in the cache, but make sure that the tag value being passed in via dxpl is still legal. This will ensure that had the entry NOT been in the cache, tagging was still set up correctly @@ -3845,7 +2960,7 @@ H5C_protect(H5F_t * f, from disk. */ /* Get the dataset transfer property list */ - if(NULL == (dxpl = (H5P_genplist_t *)H5I_object_verify(primary_dxpl_id, H5I_GENPROP_LST))) + if(NULL == (dxpl = (H5P_genplist_t *)H5I_object_verify(dxpl_id, H5I_GENPROP_LST))) HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "not a property list"); /* Get the tag from the DXPL */ @@ -3860,6 +2975,7 @@ H5C_protect(H5F_t * f, HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, "tag verification failed"); } /* end if */ +} #endif hit = TRUE; @@ -3871,7 +2987,7 @@ H5C_protect(H5F_t * f, hit = FALSE; - thing = H5C_load_entry(f, primary_dxpl_id, type, addr, udata); + thing = H5C_load_entry(f, dxpl_id, type, addr, udata); if ( thing == NULL ) { @@ -3881,7 +2997,7 @@ H5C_protect(H5F_t * f, entry_ptr = (H5C_cache_entry_t *)thing; /* Apply tag to newly protected entry */ - if(H5C_tag_entry(cache_ptr, entry_ptr, primary_dxpl_id) < 0) + if(H5C_tag_entry(cache_ptr, entry_ptr, dxpl_id) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTTAG, NULL, "Cannot tag metadata entry") /* If the entry is very large, and we are configured to allow it, @@ -3936,7 +3052,6 @@ H5C_protect(H5F_t * f, have_write_permitted = TRUE; - first_flush = TRUE; } } else { @@ -3944,7 +3059,6 @@ H5C_protect(H5F_t * f, have_write_permitted = TRUE; - first_flush = TRUE; } HDassert( entry_ptr->size <= H5C_MAX_ENTRY_SIZE ); @@ -3985,10 +3099,10 @@ H5C_protect(H5F_t * f, * see no point in worrying about the fourth. */ - result = H5C_make_space_in_cache(f, primary_dxpl_id, - secondary_dxpl_id, - space_needed, write_permitted, - &first_flush); + result = H5C_make_space_in_cache(f, + dxpl_id, + space_needed, + write_permitted); if ( result < 0 ) { @@ -4002,7 +3116,24 @@ H5C_protect(H5F_t * f, * * This is no longer true -- due to a bug fix, we may modify * data on load to repair a file. + * + * ******************************************* + * + * Set the flush_last (and possibly flush_collectively) fields + * of the newly loaded entry before inserting it into the + * index. Must do this, as the index tracked the number of + * entries with the flush_last field set, but assumes that + * the field will not change after insertion into the index. + * + * Note that this means that the H5C__FLUSH_LAST_FLAG and + * H5C__FLUSH_COLLECTIVELY_FLAG flags are ignored if the + * entry is already in cache. */ + entry_ptr->flush_me_last = flush_last; +#ifdef H5_HAVE_PARALLEL + entry_ptr->flush_me_collectively = flush_collectively; +#endif + H5C__INSERT_IN_INDEX(cache_ptr, entry_ptr, NULL) if ( ( entry_ptr->is_dirty ) && ( ! (entry_ptr->in_slist) ) ) { @@ -4017,11 +3148,11 @@ H5C_protect(H5F_t * f, */ H5C__UPDATE_RP_FOR_INSERTION(cache_ptr, entry_ptr, NULL) - /* If the entry's type has a 'notify' callback send a 'after insertion' + /* If the entry's type has a 'notify' callback send a 'after load' * notice now that the entry is fully integrated into the cache. */ if(entry_ptr->type->notify && - (entry_ptr->type->notify)(H5C_NOTIFY_ACTION_AFTER_INSERT, entry_ptr) < 0) + (entry_ptr->type->notify)(H5C_NOTIFY_ACTION_AFTER_LOAD, entry_ptr) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTNOTIFY, NULL, "can't notify client about entry inserted into cache") } @@ -4083,7 +3214,6 @@ H5C_protect(H5F_t * f, have_write_permitted = TRUE; - first_flush = TRUE; } } else { @@ -4091,7 +3221,6 @@ H5C_protect(H5F_t * f, have_write_permitted = TRUE; - first_flush = TRUE; } } @@ -4100,10 +3229,8 @@ H5C_protect(H5F_t * f, (cache_ptr->resize_ctl).epoch_length ) ) { result = H5C__auto_adjust_cache_size(f, - primary_dxpl_id, - secondary_dxpl_id, - write_permitted, - &first_flush); + dxpl_id, + write_permitted); if ( result != SUCCEED ) { HGOTO_ERROR(H5E_CACHE, H5E_CANTPROTECT, NULL, \ @@ -4137,10 +3264,10 @@ H5C_protect(H5F_t * f, if(cache_ptr->index_size > cache_ptr->max_cache_size) cache_ptr->cache_full = TRUE; - result = H5C_make_space_in_cache(f, primary_dxpl_id, - secondary_dxpl_id, - (size_t)0, write_permitted, - &first_flush); + result = H5C_make_space_in_cache(f, + dxpl_id, + (size_t)0, + write_permitted); if ( result < 0 ) { @@ -4636,6 +3763,10 @@ done: * total_entries_skipped_in_msic, total_entries_scanned_in_msic, * and max_entries_skipped_in_msic fields. * + * JRM -- 4/11/15 + * Added code displaying the new slist_scan_restarts, + * LRU_scan_restarts, and hash_bucket_scan_restarts fields; + * *------------------------------------------------------------------------- */ herr_t @@ -4659,6 +3790,7 @@ H5C_stats(H5C_t * cache_ptr, int64_t total_clears = 0; int64_t total_flushes = 0; int64_t total_evictions = 0; + int64_t total_take_ownerships = 0; int64_t total_moves = 0; int64_t total_entry_flush_moves = 0; int64_t total_cache_flush_moves = 0; @@ -4687,6 +3819,8 @@ H5C_stats(H5C_t * cache_ptr, FUNC_ENTER_NOAPI(FAIL) + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + /* This would normally be an assert, but we need to use an HGOTO_ERROR * call to shut up the compiler. */ @@ -4713,6 +3847,7 @@ H5C_stats(H5C_t * cache_ptr, total_clears += cache_ptr->clears[i]; total_flushes += cache_ptr->flushes[i]; total_evictions += cache_ptr->evictions[i]; + total_take_ownerships += cache_ptr->take_ownerships[i]; total_moves += cache_ptr->moves[i]; total_entry_flush_moves += cache_ptr->entry_flush_moves[i]; total_cache_flush_moves += cache_ptr->cache_flush_moves[i]; @@ -4864,11 +3999,16 @@ H5C_stats(H5C_t * cache_ptr, (long)max_read_protects); HDfprintf(stdout, - "%s Total clears / flushes / evictions = %ld / %ld / %ld\n", + "%s Total clears / flushes = %ld / %ld\n", cache_ptr->prefix, (long)total_clears, - (long)total_flushes, - (long)total_evictions); + (long)total_flushes); + + HDfprintf(stdout, + "%s Total evictions / take ownerships = %ld / %ld\n", + cache_ptr->prefix, + (long)total_evictions, + (long)total_take_ownerships); HDfprintf(stdout, "%s Total insertions(pinned) / moves = %ld(%ld) / %ld\n", @@ -4940,6 +4080,13 @@ H5C_stats(H5C_t * cache_ptr, (long long)(cache_ptr->total_entries_scanned_in_msic - cache_ptr->entries_scanned_to_make_space)); + HDfprintf(stdout, + "%s slist/LRU/hash bkt scan restarts = %lld / %lld / %lld.\n", + cache_ptr->prefix, + (long long)(cache_ptr->slist_scan_restarts), + (long long)(cache_ptr->LRU_scan_restarts), + (long long)(cache_ptr->hash_bucket_scan_restarts)); + #if H5C_COLLECT_CACHE_ENTRY_STATS HDfprintf(stdout, "%s aggregate max / min accesses = %d / %d\n", @@ -4993,11 +4140,16 @@ H5C_stats(H5C_t * cache_ptr, (int)(cache_ptr->max_read_protects[i])); HDfprintf(stdout, - "%s clears / flushes / evictions = %ld / %ld / %ld\n", + "%s clears / flushes = %ld / %ld\n", cache_ptr->prefix, (long)(cache_ptr->clears[i]), - (long)(cache_ptr->flushes[i]), - (long)(cache_ptr->evictions[i])); + (long)(cache_ptr->flushes[i])); + + HDfprintf(stdout, + "%s evictions / take ownerships = %ld / %ld\n", + cache_ptr->prefix, + (long)(cache_ptr->evictions[i]), + (long)(cache_ptr->take_ownerships[i])); HDfprintf(stdout, "%s insertions(pinned) / moves = %ld(%ld) / %ld\n", @@ -5092,6 +4244,11 @@ done: * total_entries_skipped_in_msic, total_entries_scanned_in_msic, * and max_entries_skipped_in_msic fields. * + * JRM 4/11/15 + * Added code to initialize the new slist_scan_restarts, + * LRU_scan_restarts, hash_bucket_scan_restarts, and + * take_ownerships fields. + * *------------------------------------------------------------------------- */ void @@ -5125,6 +4282,7 @@ H5C_stats__reset(H5C_t H5_ATTR_UNUSED * cache_ptr) cache_ptr->clears[i] = 0; cache_ptr->flushes[i] = 0; cache_ptr->evictions[i] = 0; + cache_ptr->take_ownerships[i] = 0; cache_ptr->moves[i] = 0; cache_ptr->entry_flush_moves[i] = 0; cache_ptr->cache_flush_moves[i] = 0; @@ -5167,6 +4325,10 @@ H5C_stats__reset(H5C_t H5_ATTR_UNUSED * cache_ptr) cache_ptr->max_entries_scanned_in_msic = 0; cache_ptr->entries_scanned_to_make_space = 0; + cache_ptr->slist_scan_restarts = 0; + cache_ptr->LRU_scan_restarts = 0; + cache_ptr->hash_bucket_scan_restarts = 0; + #if H5C_COLLECT_CACHE_ENTRY_STATS for ( i = 0; i <= cache_ptr->max_type_id; i++ ) @@ -5319,6 +4481,107 @@ done: /*------------------------------------------------------------------------- + * Function: H5C_dump_cache_skip_list + * + * Purpose: Debugging routine that prints a summary of the contents of + * the skip list used by the metadata cache metadata cache to + * maintain an address sorted list of dirty entries. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: John Mainzer + * 11/15/14 + * + *------------------------------------------------------------------------- + */ +#if 0 /* debugging routine */ +herr_t +H5C_dump_cache_skip_list(H5C_t * cache_ptr, char * calling_fcn) +{ + herr_t ret_value = SUCCEED; /* Return value */ + int i; + H5C_cache_entry_t * entry_ptr = NULL; + H5SL_node_t * node_ptr = NULL; + + FUNC_ENTER_NOAPI(FAIL) + + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + HDassert(calling_fcn != NULL); + + HDfprintf(stdout, "\n\nDumping metadata cache skip list from %s.\n", + calling_fcn); + HDfprintf(stdout, " slist len = %d.\n", cache_ptr->slist_len); + HDfprintf(stdout, " slist size = %lld.\n", + (long long)(cache_ptr->slist_size)); + + if ( cache_ptr->slist_len > 0 ) + { + /* If we get this far, all entries in the cache are listed in the + * skip list -- scan the skip list generating the desired output. + */ + + HDfprintf(stdout, + "Num: Addr: Len: Prot/Pind: Dirty: Type:\n"); + + i = 0; + + node_ptr = H5SL_first(cache_ptr->slist_ptr); + + if ( node_ptr != NULL ) { + + entry_ptr = (H5C_cache_entry_t *)H5SL_item(node_ptr); + + } else { + + entry_ptr = NULL; + } + + while ( entry_ptr != NULL ) { + + HDassert( entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC ); + + HDfprintf(stdout, + "%s%d 0x%016llx %4lld %d/%d %d %s\n", + cache_ptr->prefix, i, + (long long)(entry_ptr->addr), + (long long)(entry_ptr->size), + (int)(entry_ptr->is_protected), + (int)(entry_ptr->is_pinned), + (int)(entry_ptr->is_dirty), + entry_ptr->type->name); + + HDfprintf(stdout, " node_ptr = 0x%llx, item = 0x%llx\n", + (unsigned long long)node_ptr, + (unsigned long long)H5SL_item(node_ptr)); + + /* increment node_ptr before we delete its target */ + node_ptr = H5SL_next(node_ptr); + + if ( node_ptr != NULL ) { + + entry_ptr = (H5C_cache_entry_t *)H5SL_item(node_ptr); + + } else { + + entry_ptr = NULL; + } + + i++; + } + } + + HDfprintf(stdout, "\n\n"); + +done: + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5C_dump_cache_skip_list() */ +#endif /* debugging routine */ + + +/*------------------------------------------------------------------------- * Function: H5C_unpin_entry_from_client() * * Purpose: Internal routine to unpin a cache entry from a client action. @@ -5447,18 +4710,6 @@ done: * argument must be the value returned by that call to * H5C_protect(). * - * The primary_dxpl_id and secondary_dxpl_id parameters - * specify the dxpl_ids used on the first write occasioned - * by the unprotect (primary_dxpl_id), and on all subsequent - * writes (secondary_dxpl_id). Since an uprotect cannot - * occasion a write at present, all this is moot for now. - * However, things change, and in any case, - * H5C_flush_single_entry() needs primary_dxpl_id and - * secondary_dxpl_id in its parameter list. - * - * The function can't cause a read either, so the dxpl_id - * parameters are moot in this case as well. - * * Return: Non-negative on success/Negative on failure * * If the deleted flag is TRUE, simply remove the target entry @@ -5474,9 +4725,7 @@ done: */ herr_t H5C_unprotect(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - const H5C_class_t * type, + hid_t dxpl_id, haddr_t addr, void * thing, unsigned int flags) @@ -5514,9 +4763,6 @@ H5C_unprotect(H5F_t * f, HDassert( cache_ptr ); HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - HDassert( type ); - HDassert( type->clear ); - HDassert( type->flush ); HDassert( H5F_addr_defined(addr) ); HDassert( thing ); HDassert( ! ( pin_entry && unpin_entry ) ); @@ -5527,7 +4773,6 @@ H5C_unprotect(H5F_t * f, entry_ptr = (H5C_cache_entry_t *)thing; HDassert( entry_ptr->addr == addr ); - HDassert( entry_ptr->type == type ); /* also set the dirtied variable if the dirtied field is set in * the entry. @@ -5615,6 +4860,19 @@ H5C_unprotect(H5F_t * f, /* Mark the entry as dirty if appropriate */ entry_ptr->is_dirty = (entry_ptr->is_dirty || dirtied); + /* the image_up_to_date field was introduced to support + * journaling. Until we re-introduce journaling, this + * field should be equal to !entry_ptr->is_dirty. + * + * When journaling is re-enabled it should be set + * to FALSE if dirtied is TRUE. + */ +#if 1 /* JRM */ + entry_ptr->image_up_to_date = FALSE; +#else /* JRM */ + entry_ptr->image_up_to_date = !entry_ptr->is_dirty; +#endif /* JRM */ + /* Update index for newly dirtied entry */ if(was_clean && entry_ptr->is_dirty) H5C__UPDATE_INDEX_FOR_ENTRY_DIRTY(cache_ptr, entry_ptr) @@ -5656,13 +4914,6 @@ H5C_unprotect(H5F_t * f, * JRM - 5/19/04 */ if ( deleted ) { - - /* the following first flush flag will never be used as we are - * calling H5C_flush_single_entry with both the - * H5C__FLUSH_CLEAR_ONLY_FLAG and H5C__FLUSH_INVALIDATE_FLAG flags. - * However, it is needed for the function call. - */ - hbool_t dummy_first_flush = TRUE; unsigned flush_flags = (H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__FLUSH_INVALIDATE_FLAG); @@ -5670,76 +4921,51 @@ H5C_unprotect(H5F_t * f, HDassert ( ! (entry_ptr->is_pinned ) ); /* verify that the target entry is in the cache. */ - H5C__SEARCH_INDEX(cache_ptr, addr, test_entry_ptr, FAIL) + if(test_entry_ptr == NULL) + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "entry not in hash table?!?.") + else if(test_entry_ptr != entry_ptr) + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "hash table contains multiple entries for addr?!?.") - if ( test_entry_ptr == NULL ) { - - HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ - "entry not in hash table?!?.") - } - else if ( test_entry_ptr != entry_ptr ) { - - HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ - "hash table contains multiple entries for addr?!?.") - } - - /* Pass along 'free file space' flag to cache client */ - - entry_ptr->free_file_space_on_destroy = free_file_space; + /* Set the 'free file space' flag for the flush, if needed */ + if(free_file_space) + flush_flags |= H5C__FREE_FILE_SPACE_FLAG; /* Set the "take ownership" flag for the flush, if needed */ if(take_ownership) flush_flags |= H5C__TAKE_OWNERSHIP_FLAG; - if ( H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - type, - addr, - flush_flags, - &dummy_first_flush, - TRUE) < 0 ) { + /* Delete the entry from the skip list on destroy */ + flush_flags |= H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG; + + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, flush_flags, NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "Can't flush entry") - HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "Can't flush.") +#if H5C_DO_SANITY_CHECKS + if ( ( take_ownership ) && ( ! was_clean ) ) + { + /* we have just removed an entry from the skip list. Thus + * we must touch up cache_ptr->slist_len_increase and + * cache_ptr->slist_size_increase to keep from skewing + * the sanity checks on flushes. + */ + cache_ptr->slist_len_increase -= 1; + cache_ptr->slist_size_increase -= (int64_t)(entry_ptr->size); } +#endif /* H5C_DO_SANITY_CHECKS */ } #ifdef H5_HAVE_PARALLEL else if ( clear_entry ) { - /* the following first flush flag will never be used as we are - * calling H5C_flush_single_entry with the - * H5C__FLUSH_CLEAR_ONLY_FLAG flag. However, it is needed for - * the function call. - */ - hbool_t dummy_first_flush = TRUE; - /* verify that the target entry is in the cache. */ - H5C__SEARCH_INDEX(cache_ptr, addr, test_entry_ptr, FAIL) + if(test_entry_ptr == NULL) + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "entry not in hash table?!?.") + else if(test_entry_ptr != entry_ptr) + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "hash table contains multiple entries for addr?!?.") - if ( test_entry_ptr == NULL ) { - - HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ - "entry not in hash table?!?.") - } - else if ( test_entry_ptr != entry_ptr ) { - - HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, \ - "hash table contains multiple entries for addr?!?.") - } - - if ( H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - type, - addr, - H5C__FLUSH_CLEAR_ONLY_FLAG, - &dummy_first_flush, - TRUE) < 0 ) { - - HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "Can't clear.") - } + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_CLEAR_ONLY_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTUNPROTECT, FAIL, "Can't clear entry") } #endif /* H5_HAVE_PARALLEL */ } @@ -6392,10 +5618,8 @@ done: */ static herr_t H5C__auto_adjust_cache_size(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - hbool_t write_permitted, - hbool_t * first_flush_ptr) + hid_t dxpl_id, + hbool_t write_permitted) { H5C_t * cache_ptr = f->shared->cache; herr_t result; @@ -6596,13 +5820,11 @@ H5C__auto_adjust_cache_size(H5F_t * f, } else { result = H5C__autoadjust__ageout(f, + dxpl_id, hit_rate, &status, &new_max_cache_size, - primary_dxpl_id, - secondary_dxpl_id, - write_permitted, - first_flush_ptr); + write_permitted); if ( result != SUCCEED ) { @@ -6745,13 +5967,11 @@ done: */ static herr_t H5C__autoadjust__ageout(H5F_t * f, + hid_t dxpl_id, double hit_rate, enum H5C_resize_status * status_ptr, size_t * new_max_cache_size_ptr, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - hbool_t write_permitted, - hbool_t * first_flush_ptr) + hbool_t write_permitted) { H5C_t * cache_ptr = f->shared->cache; herr_t result; @@ -6792,8 +6012,7 @@ H5C__autoadjust__ageout(H5F_t * f, if ( cache_ptr->max_cache_size > (cache_ptr->resize_ctl).min_size ){ /* evict aged out cache entries if appropriate... */ - if(H5C__autoadjust__ageout__evict_aged_out_entries(f, primary_dxpl_id, - secondary_dxpl_id, write_permitted, first_flush_ptr) < 0) + if(H5C__autoadjust__ageout__evict_aged_out_entries(f, dxpl_id, write_permitted) < 0) HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "error flushing aged out entries.") /* ... and then reduce cache size if appropriate */ @@ -6990,20 +6209,31 @@ done: * * Programmer: John Mainzer, 11/22/04 * + * Changes: Modified function to detect deletions of entries + * during a scan of the LRU, and where appropriate, + * restart the scan to avoid proceeding with a next + * entry that is no longer in the cache. + * + * Note the absence of checks after flushes of clean + * entries. As a second entry can only be removed by + * by a call to the pre_serialize or serialize callback + * of the first, and as these callbacks will not be called + * on clean entries, no checks are needed. + * + * JRM -- 4/6/15 + * *------------------------------------------------------------------------- */ static herr_t H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - hbool_t write_permitted, - hbool_t * first_flush_ptr) + hid_t dxpl_id, + hbool_t write_permitted) { H5C_t * cache_ptr = f->shared->cache; - herr_t result; size_t eviction_size_limit; size_t bytes_evicted = 0; hbool_t prev_is_dirty = FALSE; + hbool_t restart_scan; H5C_cache_entry_t * entry_ptr; H5C_cache_entry_t * next_ptr; H5C_cache_entry_t * prev_ptr; @@ -7032,13 +6262,17 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f, if ( write_permitted ) { + restart_scan = FALSE; entry_ptr = cache_ptr->LRU_tail_ptr; while ( ( entry_ptr != NULL ) && ( (entry_ptr->type)->id != H5C__EPOCH_MARKER_TYPE ) && ( bytes_evicted < eviction_size_limit ) ) { + HDassert(entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC); HDassert( ! (entry_ptr->is_protected) ); + HDassert( ! (entry_ptr->is_read_only) ); + HDassert( (entry_ptr->ro_ref_count) == 0 ); next_ptr = entry_ptr->next; prev_ptr = entry_ptr->prev; @@ -7050,59 +6284,53 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f, if ( entry_ptr->is_dirty ) { - result = H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - entry_ptr->type, - entry_ptr->addr, - H5C__NO_FLAGS_SET, - first_flush_ptr, - FALSE); - } else { + /* reset entries_removed_counter and + * last_entry_removed_ptr prior to the call to + * H5C__flush_single_entry() so that we can spot + * unexpected removals of entries from the cache, + * and set the restart_scan flag if proceeding + * would be likely to cause us to scan an entry + * that is no longer in the cache. + */ + cache_ptr->entries_removed_counter = 0; + cache_ptr->last_entry_removed_ptr = NULL; - bytes_evicted += entry_ptr->size; + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__NO_FLAGS_SET, NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry") - result = H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - entry_ptr->type, - entry_ptr->addr, - H5C__FLUSH_INVALIDATE_FLAG, - first_flush_ptr, - TRUE); - } + if ( ( cache_ptr->entries_removed_counter > 1 ) || + ( cache_ptr->last_entry_removed_ptr == prev_ptr ) ) - if ( result < 0 ) { + restart_scan = TRUE; - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ - "unable to flush entry") + } else { + + bytes_evicted += entry_ptr->size; + + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0 ) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry") } if ( prev_ptr != NULL ) { -#ifndef NDEBUG - if ( prev_ptr->magic != H5C__H5C_CACHE_ENTRY_T_MAGIC ) { - /* something horrible has happened to *prev_ptr -- - * scream and die. - */ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "*prev_ptr corrupt") - - } else -#endif /* NDEBUG */ - if ( ( prev_ptr->is_dirty != prev_is_dirty ) - || - ( prev_ptr->next != next_ptr ) - || - ( prev_ptr->is_protected ) - || - ( prev_ptr->is_pinned ) ) { + if ( ( restart_scan ) + || + ( prev_ptr->is_dirty != prev_is_dirty ) + || + ( prev_ptr->next != next_ptr ) + || + ( prev_ptr->is_protected ) + || + ( prev_ptr->is_pinned ) ) { /* something has happened to the LRU -- start over * from the tail. */ + restart_scan = FALSE; entry_ptr = cache_ptr->LRU_tail_ptr; + H5C__UPDATE_STATS_FOR_LRU_SCAN_RESTART(cache_ptr) + } else { entry_ptr = prev_ptr; @@ -7160,23 +6388,15 @@ H5C__autoadjust__ageout__evict_aged_out_entries(H5F_t * f, if ( ! (entry_ptr->is_dirty) ) { - result = H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - entry_ptr->type, - entry_ptr->addr, - H5C__FLUSH_INVALIDATE_FLAG, - first_flush_ptr, - TRUE); - - if ( result < 0 ) { - - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ - "unable to flush clean entry") - } + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush clean entry") } /* just skip the entry if it is dirty, as we can't do * anything with it now since we can't write. + * + * Since all entries are clean, serialize() will not be called, + * and thus we needn't test to see if the LRU has been changed + * out from under us. */ entry_ptr = prev_ptr; @@ -7640,17 +6860,44 @@ done: * Programmer: John Mainzer * 3/24/065 * + * Changes: Modified function to test for slist chamges in + * pre_serialize and serialize callbacks, and re-start + * scans through the slist when such changes occur. + * + * This has been a potential problem for some time, + * and there has been code in this function to deal + * with elements of this issue. However the shift + * to the V3 cache in combination with the activities + * of some of the cache clients (in particular the + * free space manager and the fractal heap) have + * made this re-work necessary in H5C_flush_cache. + * + * At present, this issue doesn't seem to be causing problems + * in H5C_flush_invalidate_cache(). However, it seems + * prudent to port the H5C_flush_cache changes to this + * function as well. + * + * JRM -- 12/14/14 + * + * Added code to track entry size change during flush single + * entry. This didn't used to be a problem, as the entry + * was largely removed from the cache data structures before + * the flush proper. However, re-entrant calls to the cache + * in the parallel case required a re-factoring of the + * H5C__flush_single_entry() function to keep entries fully + * in the cache until after the pre-serialize and serialize + * calls. + * JRM -- 12/25/14 + * *------------------------------------------------------------------------- */ static herr_t -H5C_flush_invalidate_cache(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, +H5C_flush_invalidate_cache(const H5F_t * f, + hid_t dxpl_id, unsigned flags) { H5C_t * cache_ptr = f->shared->cache; - herr_t status; - hbool_t first_flush = TRUE; + hbool_t restart_slist_scan; int32_t protected_entries = 0; int32_t i; int32_t cur_pel_len; @@ -7661,10 +6908,14 @@ H5C_flush_invalidate_cache(H5F_t * f, H5C_cache_entry_t * entry_ptr = NULL; H5C_cache_entry_t * next_entry_ptr = NULL; #if H5C_DO_SANITY_CHECKS - int64_t actual_slist_len = 0; + int64_t flushed_slist_len = 0; int64_t initial_slist_len = 0; - size_t actual_slist_size = 0; + int64_t flushed_slist_size = 0; size_t initial_slist_size = 0; + int64_t entry_size_change; + int64_t * entry_size_change_ptr = &entry_size_change; +#else /* H5C_DO_SANITY_CHECKS */ + int64_t * entry_size_change_ptr = NULL; #endif /* H5C_DO_SANITY_CHECKS */ herr_t ret_value = SUCCEED; @@ -7681,16 +6932,9 @@ H5C_flush_invalidate_cache(H5F_t * f, cooked_flags = flags & H5C__FLUSH_CLEAR_ONLY_FLAG; /* remove ageout markers if present */ - if ( cache_ptr->epoch_markers_active > 0 ) { - - status = H5C__autoadjust__ageout__remove_all_markers(cache_ptr); - - if ( status != SUCCEED ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "error removing all epoch markers.") - } - } + if(cache_ptr->epoch_markers_active > 0) + if(H5C__autoadjust__ageout__remove_all_markers(cache_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "error removing all epoch markers.") /* The flush proceedure here is a bit strange. * @@ -7743,28 +6987,9 @@ H5C_flush_invalidate_cache(H5F_t * f, * may be created by the flush call backs. Thus it is possible * that the slist will not be empty after we finish the scan. */ - if ( cache_ptr->slist_len == 0 ) { - - node_ptr = NULL; - HDassert( cache_ptr->slist_size == 0 ); - - } else { - - /* Start at beginning of skip list each time */ - node_ptr = H5SL_first(cache_ptr->slist_ptr); - HDassert( node_ptr != NULL ); - - /* Get cache entry for this node */ - next_entry_ptr = (H5C_cache_entry_t *)H5SL_item(node_ptr); - if ( NULL == next_entry_ptr ) - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "next_entry_ptr == NULL ?!?!") - HDassert( next_entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC ); - HDassert( next_entry_ptr->is_dirty ); - HDassert( next_entry_ptr->in_slist ); - } #if H5C_DO_SANITY_CHECKS - /* Depending on circumstances, H5C_flush_single_entry() will + /* Depending on circumstances, H5C__flush_single_entry() will * remove dirty entries from the slist as it flushes them. * Thus for sanity checks we must make note of the initial * slist length and size before we do any flushes. @@ -7785,63 +7010,80 @@ H5C_flush_invalidate_cache(H5F_t * f, cache_ptr->slist_len_increase = 0; cache_ptr->slist_size_increase = 0; - /* Finally, reset the actual_slist_len and actual_slist_size + /* Finally, reset the flushed_slist_len and flushed_slist_size * fields to zero, as these fields are used to accumulate * the slist lenght and size that we see as we scan through * the slist. */ - actual_slist_len = 0; - actual_slist_size = 0; + flushed_slist_len = 0; + flushed_slist_size = 0; #endif /* H5C_DO_SANITY_CHECKS */ - while ( node_ptr != NULL ) + /* set the cache_ptr->slist_change_in_pre_serialize and + * cache_ptr->slist_change_in_serialize to false. + * + * These flags are set to TRUE by H5C__flush_single_entry if the + * slist is modified by a pre_serialize or serialize call + * respectively. + * + * H5C_flush_invalidate_cache() uses these flags to detect any + * modifications to the slist that might corrupt the scan of + * the slist -- and restart the scan in this event. + */ + cache_ptr->slist_change_in_pre_serialize = FALSE; + cache_ptr->slist_change_in_serialize = FALSE; + + /* this done, start the scan of the slist */ + + restart_slist_scan = TRUE; + + while ( ( restart_slist_scan ) || ( node_ptr != NULL ) ) { - entry_ptr = next_entry_ptr; + if ( restart_slist_scan ) + { + restart_slist_scan = FALSE; - /* With the advent of the fractal heap, it is possible - * that the flush callback will dirty and/or resize - * other entries in the cache. In particular, while - * Quincey has promised me that this will never happen, - * it is possible that the flush callback for an - * entry may protect an entry that is not in the cache, - * perhaps causing the cache to flush and possibly - * evict the entry associated with node_ptr to make - * space for the new entry. - * - * Thus we do a bit of extra sanity checking on entry_ptr, - * and break out of this scan of the skip list if we - * detect major problems. We have a bit of leaway on the - * number of passes though the skip list, so this shouldn't - * be an issue in the flush in and of itself, as it should - * be all but impossible for this to happen more than once - * in any flush. - * - * Observe that that breaking out of the scan early - * shouldn't break the sanity checks just after the end - * of this while loop. - * - * If an entry has merely been marked clean and removed from - * the s-list, we simply break out of the scan. - * - * If the entry has been evicted, we flag an error and - * exit. - */ -#ifndef NDEBUG - if ( entry_ptr->magic != H5C__H5C_CACHE_ENTRY_T_MAGIC ) { + /* Start at beginning of skip list */ + node_ptr = H5SL_first(cache_ptr->slist_ptr); - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry_ptr->magic is invalid ?!?!") + if ( node_ptr == NULL ) + { + /* the slist is empty -- break out of inner loop */ + break; + } + HDassert( node_ptr != NULL ); - } else -#endif /* NDEBUG */ - if ( ( ! entry_ptr->is_dirty ) || - ( ! entry_ptr->in_slist ) ) { + /* Get cache entry for this node */ + next_entry_ptr = (H5C_cache_entry_t *)H5SL_item(node_ptr); - /* the s-list has been modified out from under us. - * break out of the loop. - */ - goto end_of_inner_loop;; + if(NULL == next_entry_ptr) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ + "next_entry_ptr == NULL ?!?!") + + HDassert( next_entry_ptr->magic == \ + H5C__H5C_CACHE_ENTRY_T_MAGIC ); + HDassert( next_entry_ptr->is_dirty ); + HDassert( next_entry_ptr->in_slist ); } + entry_ptr = next_entry_ptr; + + /* It is possible that entries will be dirtied, resized, + * flushed, or removed from the cache via the take ownership + * flag as the result of pre_serialize or serialized callbacks. + * + * This in turn can corrupt the scan through the slist. + * + * We test for slist modifications in the pre_serialize + * and serialize callbacks, and restart the scan of the + * slist if we find them. However, best we do some extra + * sanity checking just in case. + */ + + HDassert( entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC ); + HDassert( entry_ptr->in_slist ); + HDassert( entry_ptr->is_dirty ); + /* increment node pointer now, before we delete its target * from the slist. */ @@ -7854,6 +7096,7 @@ H5C_flush_invalidate_cache(H5F_t * f, HDassert( next_entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC ); HDassert( next_entry_ptr->is_dirty ); HDassert( next_entry_ptr->in_slist ); + HDassert( entry_ptr != next_entry_ptr ); } else { next_entry_ptr = NULL; } @@ -7877,21 +7120,6 @@ H5C_flush_invalidate_cache(H5F_t * f, ( cache_ptr->num_last_entries >= cache_ptr->slist_len ) ) ) { -#if H5C_DO_SANITY_CHECKS - /* update actual_slist_len & actual_slist_size before - * the flush. Note that the entry will be removed - * from the slist after the flush, and thus may be - * resized by the flush callback. This is OK, as - * we will catch the size delta in - * cache_ptr->slist_size_increase. - * - * Note that we include pinned entries in this count, even - * though we will not actually flush them. - */ - actual_slist_len++; - actual_slist_size += entry_ptr->size; -#endif /* H5C_DO_SANITY_CHECKS */ - if ( entry_ptr->is_protected ) { /* we have major problems -- but lets flush @@ -7903,28 +7131,52 @@ H5C_flush_invalidate_cache(H5F_t * f, /* Test to see if we are can flush the entry now. * If we can, go ahead and flush, but don't tell - * H5C_flush_single_entry() to destroy the entry + * H5C__flush_single_entry() to destroy the entry * as pinned entries can't be evicted. */ if(entry_ptr->flush_dep_height == curr_flush_dep_height ) { - status = H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - NULL, - entry_ptr->addr, - H5C__NO_FLAGS_SET, - &first_flush, - FALSE); - if ( status < 0 ) { - - /* This shouldn't happen -- if it does, we - * are toast so just scream and die. - */ +#if H5C_DO_SANITY_CHECKS + /* update flushed_slist_len & flushed_slist_size + * before the flush. Note that the entry will + * be removed from the slist after the flush, + * and thus may be resized by the flush callback. + * This is OK, as we will catch the size delta in + * cache_ptr->slist_size_increase. + * + */ + flushed_slist_len++; + flushed_slist_size += (int64_t)entry_ptr->size; + entry_size_change = 0; +#endif /* H5C_DO_SANITY_CHECKS */ + + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__NO_FLAGS_SET, entry_size_change_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "dirty pinned entry flush failed.") + +#if H5C_DO_SANITY_CHECKS + /* entry size may have changed during the flush. + * Update flushed_slist_size to account for this. + */ + flushed_slist_size += entry_size_change; +#endif /* H5C_DO_SANITY_CHECKS */ - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ - "dirty pinned entry flush failed.") - } /* end if */ flushed_during_dep_loop = TRUE; + + if ( ( cache_ptr->slist_change_in_serialize ) || + ( cache_ptr->slist_change_in_pre_serialize ) ) + { + /* The slist has been modified by something + * other than the simple removal of the + * of the flushed entry after the flush. + * + * This has the potential to corrupt the + * scan through the slist, so restart it. + */ + restart_slist_scan = TRUE; + cache_ptr->slist_change_in_pre_serialize + = FALSE; + cache_ptr->slist_change_in_serialize = FALSE; + H5C__UPDATE_STATS_FOR_SLIST_SCAN_RESTART(cache_ptr); + } } /* end if */ else if(entry_ptr->flush_dep_height < curr_flush_dep_height) /* This shouldn't happen -- if it does, just scream and die. */ @@ -7932,25 +7184,48 @@ H5C_flush_invalidate_cache(H5F_t * f, } /* end if */ else { if(entry_ptr->flush_dep_height == curr_flush_dep_height ){ +#if H5C_DO_SANITY_CHECKS + /* update flushed_slist_len & flushed_slist_size + * before the flush. Note that the entry will + * be removed from the slist after the flush, + * and thus may be resized by the flush callback. + * This is OK, as we will catch the size delta in + * cache_ptr->slist_size_increase. + * + */ + flushed_slist_len++; + flushed_slist_size += (int64_t)entry_ptr->size; + entry_size_change = 0; +#endif /* H5C_DO_SANITY_CHECKS */ - status = H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - NULL, - entry_ptr->addr, - (cooked_flags | H5C__FLUSH_INVALIDATE_FLAG), - &first_flush, - TRUE); - if ( status < 0 ) { - - /* This shouldn't happen -- if it does, we - * are toast so just scream and die. - */ + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, (cooked_flags | H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG), entry_size_change_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "dirty entry flush destroy failed.") + +#if H5C_DO_SANITY_CHECKS + /* entry size may have changed during the flush. + * Update flushed_slist_size to account for this. + */ + flushed_slist_size += entry_size_change; +#endif /* H5C_DO_SANITY_CHECKS */ - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ - "dirty entry flush destroy failed.") - } /* end if */ flushed_during_dep_loop = TRUE; + + if ((cache_ptr->slist_change_in_serialize) || + (cache_ptr->slist_change_in_pre_serialize)) + { + /* The slist has been modified by something + * other than the simple removal of the + * of the flushed entry after the flush. + * + * This has the potential to corrupt the + * scan through the slist, so restart it. + */ + restart_slist_scan = TRUE; + cache_ptr->slist_change_in_pre_serialize + = FALSE; + cache_ptr->slist_change_in_serialize = FALSE; + H5C__UPDATE_STATS_FOR_SLIST_SCAN_RESTART(cache_ptr) + } } /* end if */ else if(entry_ptr->flush_dep_height < curr_flush_dep_height) /* This shouldn't happen -- if it does, just scream and die. */ @@ -7971,11 +7246,10 @@ H5C_flush_invalidate_cache(H5F_t * f, if ( node_ptr == NULL ) { - HDassert( (actual_slist_len + cache_ptr->slist_len) == + HDassert( (flushed_slist_len + cache_ptr->slist_len) == (initial_slist_len + cache_ptr->slist_len_increase) ); - HDassert( (actual_slist_size + cache_ptr->slist_size) == - (initial_slist_size + - (size_t)(cache_ptr->slist_size_increase)) ); + HDassert( (flushed_slist_size + (int64_t)cache_ptr->slist_size) == + ((int64_t)initial_slist_size + cache_ptr->slist_size_increase) ); } #endif /* H5C_DO_SANITY_CHECKS */ @@ -7997,9 +7271,7 @@ H5C_flush_invalidate_cache(H5F_t * f, while ( next_entry_ptr != NULL ) { entry_ptr = next_entry_ptr; -#ifndef NDEBUG HDassert( entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC ); -#endif /* NDEBUG */ next_entry_ptr = entry_ptr->ht_next; HDassert ( ( next_entry_ptr == NULL ) || @@ -8028,23 +7300,46 @@ H5C_flush_invalidate_cache(H5F_t * f, * If we can, go ahead and flush. */ if(entry_ptr->flush_dep_height == curr_flush_dep_height ){ - status = H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - NULL, - entry_ptr->addr, - (cooked_flags | H5C__FLUSH_INVALIDATE_FLAG), - &first_flush, - TRUE); - if ( status < 0 ) { - - /* This shouldn't happen -- if it does, - * we are toast so just scream and die. - */ + /* if *entry_ptr is dirty, it is possible + * that one or more other entries may be + * either removed from the cache, loaded + * into the cache, or moved to a new location + * in the file as a side effect of the flush. + * + * If this happens, and one of the target + * entries happens to be the next entry in + * the hash bucket, we could find ourselves + * either find ourselves either scanning a + * non-existant entry, scanning through a + * different bucket, or skipping an entry. + * + * Neither of these are good, so restart the + * the scan at the head of the hash bucket + * after the flush if *entry_ptr was dirty, + * on the off chance that the next entry was + * a target. + * + * This is not as inefficient at it might seem, + * as hash buckets typically have at most two + * or three entries. + */ + hbool_t entry_was_dirty; + + entry_was_dirty = entry_ptr->is_dirty; - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ - "Entry flush destroy failed.") + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, (cooked_flags | H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG), NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Entry flush destroy failed.") + + if ( entry_was_dirty ) { + + /* update stats for hash bucket scan + * restart here. + * -- JRM + */ + next_entry_ptr = cache_ptr->index[i]; + H5C__UPDATE_STATS_FOR_HASH_BUCKET_SCAN_RESTART(cache_ptr) } + flushed_during_dep_loop = TRUE; } /* end if */ else if(entry_ptr->flush_dep_height < curr_flush_dep_height) @@ -8060,7 +7355,7 @@ H5C_flush_invalidate_cache(H5F_t * f, * of pinned entries from pass to pass. If it stops * shrinking before it hits zero, we scream and die. */ - /* if the flush function on the entry we last evicted + /* if the serialize function on the entry we last evicted * loaded an entry into cache (as Quincey has promised me * it never will), and if the cache was full, it is * possible that *next_entry_ptr was flushed or evicted. @@ -8069,8 +7364,13 @@ H5C_flush_invalidate_cache(H5F_t * f, * test is triggred, we are accessing a deallocated piece * of dynamically allocated memory, so we just scream and * die. + * + * Update: The code to restart the scan after flushes + * of dirty entries should make it impossible + * to satisfy the following test. Leave it in + * in case I am wrong. + * -- JRM */ -#ifndef NDEBUG if ( ( next_entry_ptr != NULL ) && ( next_entry_ptr->magic != H5C__H5C_CACHE_ENTRY_T_MAGIC ) ) { @@ -8081,7 +7381,6 @@ H5C_flush_invalidate_cache(H5F_t * f, HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ "next_entry_ptr->magic is invalid?!?!?.") } -#endif /* NDEBUG */ } /* end while loop scanning hash table bin */ } /* end for loop scanning hash table */ @@ -8099,7 +7398,6 @@ H5C_flush_invalidate_cache(H5F_t * f, curr_flush_dep_height++; } /* end while loop over flush dependency heights */ -end_of_inner_loop: old_pel_len = cur_pel_len; cur_pel_len = cache_ptr->pel_len; @@ -8167,7 +7465,7 @@ done: /*------------------------------------------------------------------------- * - * Function: H5C_flush_single_entry + * Function: H5C__flush_single_entry * * Purpose: Flush or clear (and evict if requested) the cache entry * with the specified address and type. If the type is NULL, @@ -8177,22 +7475,9 @@ done: * Attempts to flush a protected entry will result in an * error. * - * *first_flush_ptr should be true if only one - * flush is contemplated before the next load, or if this - * is the first of a sequence of flushes that will be - * completed before the next load. *first_flush_ptr is set - * to false if a flush actually takes place, and should be - * left false until the end of the sequence. - * - * The primary_dxpl_id is used if *first_flush_ptr is TRUE - * on entry, and a flush actually takes place. The - * secondary_dxpl_id is used in any subsequent flush where - * *first_flush_ptr is FALSE on entry. - * * If the H5C__FLUSH_INVALIDATE_FLAG flag is set, the entry will - * be cleared and not flushed -- in the case *first_flush_ptr, - * primary_dxpl_id, and secondary_dxpl_id are all irrelevent, - * and the call can't be part of a sequence of flushes. + * be cleared and not flushed, and the call can't be part of a + * sequence of flushes. * * If the caller knows the address of the skip list node at * which the target entry resides, it can avoid a lookup @@ -8209,41 +7494,73 @@ done: * * Programmer: John Mainzer, 5/5/04 * + * Changes: Refactored function to remove the type_ptr parameter. + * + * JRM -- 8/7/14 + * + * Added code to check for slist changes in pre_serialize and + * serialize calls, and set + * cache_ptr->slist_change_in_pre_serialize and + * cache_ptr->slist_change_in_serialize as appropriate. + * + * JRM -- 12/13/14 + * + * Refactored function to delay all modifications of the + * metadata cache data structures until after any calls + * to the pre-serialize or serialize callbacks. + * + * Need to do this, as some pre-serialize or serialize + * calls result in calls to the metadata cache and + * modifications to its data structures. Thus, at the + * time of any such call, the target entry flags and + * the metadata cache must all be consistant. + * + * Also added the entry_size_change_ptr parameter, which + * allows the function to report back any change in the size + * of the entry during the flush. Such size changes may + * occur during the pre-serialize callback. + * + * JRM -- 12/24/14 + * *------------------------------------------------------------------------- */ -static herr_t -H5C_flush_single_entry(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, - const H5C_class_t * type_ptr, - haddr_t addr, - unsigned flags, - hbool_t * first_flush_ptr, - hbool_t del_entry_from_slist_on_destroy) +herr_t +H5C__flush_single_entry(const H5F_t *f, hid_t dxpl_id, H5C_cache_entry_t *entry_ptr, + unsigned flags, int64_t *entry_size_change_ptr) { - H5C_t * cache_ptr = f->shared->cache; + H5C_t * cache_ptr; /* Cache for file */ hbool_t destroy; /* external flag */ hbool_t clear_only; /* external flag */ + hbool_t free_file_space; /* external flag */ hbool_t take_ownership; /* external flag */ + hbool_t del_from_slist_on_destroy; /* external flag */ + hbool_t write_entry; /* internal flag */ + hbool_t destroy_entry; /* internal flag */ hbool_t was_dirty; - hbool_t destroy_entry; - herr_t status; - unsigned flush_flags = H5C_CALLBACK__NO_FLAGS_SET; - H5C_cache_entry_t * entry_ptr = NULL; + haddr_t new_addr = HADDR_UNDEF; + haddr_t old_addr = HADDR_UNDEF; + size_t new_len = 0; + size_t new_compressed_len = 0; herr_t ret_value = SUCCEED; /* Return value */ - FUNC_ENTER_NOAPI_NOINIT + FUNC_ENTER_PACKAGE - HDassert( f ); - HDassert( cache_ptr ); - HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - HDassert( H5F_addr_defined(addr) ); - HDassert( first_flush_ptr ); + HDassert(f); + cache_ptr = f->shared->cache; + HDassert(cache_ptr); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + HDassert(entry_ptr); + + /* If defined, initialize *entry_size_change_ptr to 0 */ + if(entry_size_change_ptr != NULL) + *entry_size_change_ptr = 0; /* setup external flags from the flags parameter */ destroy = ((flags & H5C__FLUSH_INVALIDATE_FLAG) != 0); clear_only = ((flags & H5C__FLUSH_CLEAR_ONLY_FLAG) != 0); + free_file_space = ((flags & H5C__FREE_FILE_SPACE_FLAG) != 0); take_ownership = ((flags & H5C__TAKE_OWNERSHIP_FLAG) != 0); + del_from_slist_on_destroy = ((flags & H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG) != 0); /* Set the flag for destroying the entry, based on the 'take ownership' * and 'destroy' flags @@ -8253,363 +7570,592 @@ H5C_flush_single_entry(H5F_t * f, else destroy_entry = destroy; - /* attempt to find the target entry in the hash table */ - H5C__SEARCH_INDEX(cache_ptr, addr, entry_ptr, FAIL) + /* we will write the entry to disk if it exists, is dirty, and if the + * clear only flag is not set. + */ + if(entry_ptr->is_dirty && !clear_only) + write_entry = TRUE; + else + write_entry = FALSE; /* run initial sanity checks */ #if H5C_DO_SANITY_CHECKS - if ( entry_ptr != NULL ) { + HDassert( ! ( destroy && entry_ptr->is_pinned ) ); - HDassert( ! ( ( destroy ) && ( entry_ptr->is_pinned ) ) ); + if(entry_ptr->in_slist) { + HDassert(entry_ptr->is_dirty); - if ( entry_ptr->in_slist ) { - - if ( ( ( entry_ptr->flush_marker ) && ( ! entry_ptr->is_dirty ) ) || - ( entry_ptr->addr != addr ) ) { - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "entry in slist failed sanity checks.") - } - } else { - - if ( ( entry_ptr->is_dirty ) || - ( entry_ptr->flush_marker ) || - ( entry_ptr->addr != addr ) ) { + if((entry_ptr->flush_marker) && (!entry_ptr->is_dirty)) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry in slist failed sanity checks.") + } else { + HDassert(!entry_ptr->is_dirty); + HDassert(!entry_ptr->flush_marker); - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "entry failed sanity checks.") - } - } + if((entry_ptr->is_dirty) || (entry_ptr->flush_marker)) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "entry failed sanity checks.") } #endif /* H5C_DO_SANITY_CHECKS */ - if ( ( entry_ptr != NULL ) && ( entry_ptr->is_protected ) ) - { + if(entry_ptr->is_protected) { + HDassert(!entry_ptr->is_protected); /* Attempt to flush a protected entry -- scream and die. */ - HGOTO_ERROR(H5E_CACHE, H5E_PROTECT, FAIL, \ - "Attempt to flush a protected entry.") - } + HGOTO_ERROR(H5E_CACHE, H5E_PROTECT, FAIL, "Attempt to flush a protected entry.") + } /* end if */ - if ( ( entry_ptr != NULL ) && - ( ( type_ptr == NULL ) || ( type_ptr->id == entry_ptr->type->id ) ) ) - { - /* we have work to do */ + /* set entry_ptr->flush_in_progress = TRUE and set + * entry_ptr->flush_marker = FALSE + * + * in the parallel case, do some sanity checking in passing. + */ + HDassert(entry_ptr->type); - /* We will set flush_in_progress back to FALSE at the end if the - * entry still exists at that point. - */ - entry_ptr->flush_in_progress = TRUE; + was_dirty = entry_ptr->is_dirty; /* needed later for logging */ + + /* We will set flush_in_progress back to FALSE at the end if the + * entry still exists at that point. + */ + entry_ptr->flush_in_progress = TRUE; + entry_ptr->flush_marker = FALSE; #ifdef H5_HAVE_PARALLEL #ifndef NDEBUG - /* If MPI based VFD is used, do special parallel I/O sanity checks. - * Note that we only do these sanity checks when the clear_only flag - * is not set, and the entry to be flushed is dirty. Don't bother - * otherwise as no file I/O can result. - */ - if(!clear_only && entry_ptr->is_dirty && - H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) { - H5P_genplist_t *dxpl; /* Dataset transfer property list */ - unsigned coll_meta; /* Collective metadata write flag */ + /* If MPI based VFD is used, do special parallel I/O sanity checks. + * Note that we only do these sanity checks when the clear_only flag + * is not set, and the entry to be flushed is dirty. Don't bother + * otherwise as no file I/O can result. + */ + if(!clear_only && entry_ptr->is_dirty && H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) { + H5P_genplist_t *dxpl; /* Dataset transfer property list */ + unsigned coll_meta; /* Collective metadata write flag */ - /* Get the dataset transfer property list */ - if(NULL == (dxpl = H5I_object(primary_dxpl_id))) - HGOTO_ERROR(H5E_CACHE, H5E_BADTYPE, FAIL, "not a dataset transfer property list") + /* Get the dataset transfer property list */ + if(NULL == (dxpl = H5I_object(dxpl_id))) + HGOTO_ERROR(H5E_CACHE, H5E_BADTYPE, FAIL, "not a dataset transfer property list") - /* Get the collective metadata write property */ - if(H5P_get(dxpl, H5AC_COLLECTIVE_META_WRITE_NAME, &coll_meta) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't retrieve xfer mode") + /* Get the collective metadata write property */ + if(H5P_get(dxpl, H5AC_COLLECTIVE_META_WRITE_NAME, &coll_meta) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't retrieve xfer mode") - /* Sanity check collective metadata write flag */ - HDassert(coll_meta); - } /* end if */ + /* Sanity check collective metadata write flag */ + HDassert(coll_meta); + } /* end if */ #endif /* NDEBUG */ #endif /* H5_HAVE_PARALLEL */ - was_dirty = entry_ptr->is_dirty; + /* serialize the entry if necessary, and then write it to disk. */ + if(write_entry) { + unsigned serialize_flags = H5C__SERIALIZE_NO_FLAGS_SET; - entry_ptr->flush_marker = FALSE; + /* The entry is dirty, and we are doing either a flush, + * or a flush destroy. In either case, serialize the + * entry and write it to disk. + * + * Note that this may cause the entry to be re-sized and/or + * moved in the cache. + * + * As we will not update the metadata cache's data structures + * until we we finish the write, we must touch up these + * data structures for size and location changes even if we + * are about to delete the entry from the cache (i.e. on a + * flush destroy). + */ + HDassert(entry_ptr->is_dirty); - if ( clear_only ) { - H5C__UPDATE_STATS_FOR_CLEAR(cache_ptr, entry_ptr) - } else { - H5C__UPDATE_STATS_FOR_FLUSH(cache_ptr, entry_ptr) - } +#if H5C_DO_SANITY_CHECKS + if(cache_ptr->check_write_permitted && !(cache_ptr->write_permitted)) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Write when writes are always forbidden!?!?!") +#endif /* H5C_DO_SANITY_CHECKS */ - if ( destroy ) { - H5C__UPDATE_STATS_FOR_EVICTION(cache_ptr, entry_ptr) - } + if(NULL == entry_ptr->image_ptr) { + size_t image_size; - /* If the entry's type has a 'notify' callback and the entry is about - * to be removed from the cache, send a 'before eviction' notice while - * the entry is still fully integrated in the cache. - */ - if(destroy) { - if(entry_ptr->type->notify && - (entry_ptr->type->notify)(H5C_NOTIFY_ACTION_BEFORE_EVICT, entry_ptr) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTNOTIFY, FAIL, "can't notify client about entry to evict") + if(entry_ptr->compressed) + image_size = entry_ptr->compressed_size; + else + image_size = entry_ptr->size; + HDassert(image_size > 0); + + + if(NULL == (entry_ptr->image_ptr = H5MM_malloc(image_size + H5C_IMAGE_EXTRA_SPACE))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for on disk image buffer") +#if H5C_DO_MEMORY_SANITY_CHECKS + HDmemcpy(((uint8_t *)entry_ptr->image_ptr) + image_size, H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE); +#endif /* H5C_DO_MEMORY_SANITY_CHECKS */ } /* end if */ - /* Always remove the entry from the hash table on a destroy. On a - * flush with destroy, it is cheaper to discard the skip list all at - * once rather than remove the entries one by one, so we only delete - * from the slist only if requested. - * - * We must do deletions now as the callback routines will free the - * entry if destroy is true. - * - * Note that it is possible that the entry will be moved during - * its call to flush. This will upset H5C_move_entry() if we - * don't tell it that it doesn't have to worry about updating the - * index and SLIST. Use the destroy_in_progress field for this - * purpose. - */ - if ( destroy ) { + if(!(entry_ptr->image_up_to_date)) { + /* reset cache_ptr->slist_changed so we can detect slist + * modifications in the pre_serialize call. + */ + cache_ptr->slist_changed = FALSE; + + /* make note of the entry's current address */ + old_addr = entry_ptr->addr; + + /* Call client's pre-serialize callback, if there's one */ + if ( ( entry_ptr->type->pre_serialize != NULL ) && + ( (entry_ptr->type->pre_serialize)(f, dxpl_id, + (void *)entry_ptr, + entry_ptr->addr, + entry_ptr->size, + entry_ptr->compressed_size, + &new_addr, &new_len, + &new_compressed_len, + &serialize_flags) < 0 ) ) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to pre-serialize entry") + + /* set cache_ptr->slist_change_in_pre_serialize if the + * slist was modified. + */ + if(cache_ptr->slist_changed) + cache_ptr->slist_change_in_pre_serialize = TRUE; + + /* Check for any flags set in the pre-serialize callback */ + if(serialize_flags != H5C__SERIALIZE_NO_FLAGS_SET) { + /* Check for unexpected flags from serialize callback */ + if(serialize_flags & ~(H5C__SERIALIZE_RESIZED_FLAG | + H5C__SERIALIZE_MOVED_FLAG | + H5C__SERIALIZE_COMPRESSED_FLAG)) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unknown serialize flag(s)") +#ifdef H5_HAVE_PARALLEL + /* In the parallel case, resizes and moves in + * the serialize operation can cause problems. + * If they occur, scream and die. + * + * At present, in the parallel case, the aux_ptr + * will only be set if there is more than one + * process. Thus we can use this to detect + * the parallel case. + * + * This works for now, but if we start using the + * aux_ptr for other purposes, we will have to + * change this test accordingly. + * + * NB: While this test detects entryies that attempt + * to resize or move themselves during a flush + * in the parallel case, it will not detect an + * entry that dirties, resizes, and/or moves + * other entries during its flush. + * + * From what Quincey tells me, this test is + * sufficient for now, as any flush routine that + * does the latter will also do the former. + * + * If that ceases to be the case, further + * tests will be necessary. + */ + if(cache_ptr->aux_ptr != NULL) + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "resize/move in serialize occured in parallel case.") +#endif /* H5_HAVE_PARALLEL */ - entry_ptr->destroy_in_progress = TRUE; + /* Resize the buffer if required */ + if ( ( ( ! entry_ptr->compressed ) && + ( serialize_flags & H5C__SERIALIZE_RESIZED_FLAG ) ) || + ( ( entry_ptr->compressed ) && + ( serialize_flags & H5C__SERIALIZE_COMPRESSED_FLAG ) ) ) + { + size_t new_image_size; + + if(entry_ptr->compressed) + new_image_size = new_compressed_len; + else + new_image_size = new_len; + HDassert(new_image_size > 0); + + /* Release the current image */ + if(entry_ptr->image_ptr) + entry_ptr->image_ptr = H5MM_xfree(entry_ptr->image_ptr); + + /* Allocate a new image buffer */ + if(NULL == (entry_ptr->image_ptr = H5MM_malloc(new_image_size + H5C_IMAGE_EXTRA_SPACE))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for on disk image buffer") +#if H5C_DO_MEMORY_SANITY_CHECKS + HDmemcpy(((uint8_t *)entry_ptr->image_ptr) + new_image_size, H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE); +#endif /* H5C_DO_MEMORY_SANITY_CHECKS */ + } /* end if */ - H5C__DELETE_FROM_INDEX(cache_ptr, entry_ptr) + /* If required, update the entry and the cache data structures + * for a resize. + */ + if(serialize_flags & H5C__SERIALIZE_RESIZED_FLAG) { + H5C__UPDATE_STATS_FOR_ENTRY_SIZE_CHANGE(cache_ptr, \ + entry_ptr, new_len) + + /* update the hash table for the size change*/ + H5C__UPDATE_INDEX_FOR_SIZE_CHANGE(cache_ptr, \ + entry_ptr->size, \ + new_len, entry_ptr, \ + !(entry_ptr->is_dirty)); + + /* The entry can't be protected since we are + * in the process of flushing it. Thus we must + * update the replacement policy data + * structures for the size change. The macro + * deals with the pinned case. + */ + H5C__UPDATE_RP_FOR_SIZE_CHANGE(cache_ptr, entry_ptr, new_len); - if ( ( entry_ptr->in_slist ) && - ( del_entry_from_slist_on_destroy ) ) { + /* as we haven't updated the cache data structures for + * for the flush or flush destroy yet, the entry should + * be in the slist. Thus update it for the size change. + */ + HDassert(entry_ptr->in_slist); + H5C__UPDATE_SLIST_FOR_SIZE_CHANGE(cache_ptr, entry_ptr->size, \ + new_len) - H5C__REMOVE_ENTRY_FROM_SLIST(cache_ptr, entry_ptr) - } - } + /* if defined, update *entry_size_change_ptr for the + * change in entry size. + */ + if(entry_size_change_ptr != NULL) + *entry_size_change_ptr = (int64_t)new_len - (int64_t)(entry_ptr->size); - /* Update the replacement policy for the flush or eviction. - * Again, do this now so we don't have to reference freed - * memory in the destroy case. - */ - if ( destroy ) { /* AKA eviction */ + /* finally, update the entry for its new size */ + entry_ptr->size = new_len; + } /* end if */ - H5C__UPDATE_RP_FOR_EVICTION(cache_ptr, entry_ptr, FAIL) + /* If required, udate the entry and the cache data structures + * for a move + */ + if(serialize_flags & H5C__SERIALIZE_MOVED_FLAG) { +#if H5C_DO_SANITY_CHECKS + int64_t saved_slist_len_increase; + int64_t saved_slist_size_increase; +#endif /* H5C_DO_SANITY_CHECKS */ - } else { + H5C__UPDATE_STATS_FOR_MOVE(cache_ptr, entry_ptr) - H5C__UPDATE_RP_FOR_FLUSH(cache_ptr, entry_ptr, FAIL) - } + if(entry_ptr->addr == old_addr) { + /* we must update cache data structures for the + * change in address. + */ - /* Clear the dirty flag only, if requested */ - if ( clear_only ) { + /* delete the entry from the hash table and the slist */ + H5C__DELETE_FROM_INDEX(cache_ptr, entry_ptr) + H5C__REMOVE_ENTRY_FROM_SLIST(cache_ptr, entry_ptr) - if ( destroy ) { -#ifndef NDEBUG - /* we are about to call the clear callback with the - * destroy flag set -- this will result in *entry_ptr - * being freed. Set the magic field to bad magic - * so we can detect a freed cache entry if we see - * one. - */ - entry_ptr->magic = H5C__H5C_CACHE_ENTRY_T_BAD_MAGIC; -#endif /* NDEBUG */ - entry_ptr->cache_ptr = NULL; - } - /* Call the callback routine to clear all dirty flags for object */ - if ( (entry_ptr->type->clear)(f, entry_ptr, destroy_entry) < 0 ) { + /* update the entry for its new address */ + entry_ptr->addr = new_addr; - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't clear entry") - } - } else { + /* and then reinsert in the index and slist */ + H5C__INSERT_IN_INDEX(cache_ptr, entry_ptr, FAIL) #if H5C_DO_SANITY_CHECKS - if ( ( entry_ptr->is_dirty ) && - ( cache_ptr->check_write_permitted == NULL ) && - ( ! (cache_ptr->write_permitted) ) ) + /* save cache_ptr->slist_len_increase and + * cache_ptr->slist_size_increase before the + * reinsertion into the slist, and restore + * them afterwards to avoid skewing our sanity + * checking. + */ + saved_slist_len_increase = cache_ptr->slist_len_increase; + saved_slist_size_increase = cache_ptr->slist_size_increase; +#endif /* H5C_DO_SANITY_CHECKS */ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "Write when writes are always forbidden!?!?!") + H5C__INSERT_ENTRY_IN_SLIST(cache_ptr, entry_ptr, FAIL) + +#if H5C_DO_SANITY_CHECKS + cache_ptr->slist_len_increase = saved_slist_len_increase; + cache_ptr->slist_size_increase = saved_slist_size_increase; #endif /* H5C_DO_SANITY_CHECKS */ + } + else /* move is alread done for us -- just do sanity checks */ + HDassert(entry_ptr->addr == new_addr); + } /* end if */ - if ( destroy ) { -#ifndef NDEBUG - /* we are about to call the flush callback with the - * destroy flag set -- this will result in *entry_ptr - * being freed. Set the magic field to bad magic - * so we can detect a freed cache entry if we see - * one. - */ - entry_ptr->magic = H5C__H5C_CACHE_ENTRY_T_BAD_MAGIC; -#endif /* NDEBUG */ - entry_ptr->cache_ptr = NULL; - } + if(serialize_flags & H5C__SERIALIZE_COMPRESSED_FLAG) { + /* just save the new compressed entry size in + * entry_ptr->compressed_size. We don't need to + * do more, as compressed size is only used for I/O. + */ + HDassert(entry_ptr->compressed); + entry_ptr->compressed_size = new_compressed_len; + } + } /* end if ( serialize_flags != H5C__SERIALIZE_NO_FLAGS_SET ) */ - /* Only block for all the processes on the first piece of metadata - */ + /* Serialize object into buffer */ + { + size_t image_len; - if ( *first_flush_ptr && entry_ptr->is_dirty ) { + if(entry_ptr->compressed) + image_len = entry_ptr->compressed_size; + else + image_len = entry_ptr->size; - status = (entry_ptr->type->flush)(f, primary_dxpl_id, destroy_entry, - entry_ptr->addr, entry_ptr, - &flush_flags); - *first_flush_ptr = FALSE; + /* reset cache_ptr->slist_changed so we can detect slist + * modifications in the serialize call. + */ + cache_ptr->slist_changed = FALSE; - } else { + + if(entry_ptr->type->serialize(f, entry_ptr->image_ptr, + image_len, (void *)entry_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to serialize entry") - status = (entry_ptr->type->flush)(f, secondary_dxpl_id, - destroy_entry, entry_ptr->addr, - entry_ptr, &flush_flags); - } + /* set cache_ptr->slist_change_in_serialize if the + * slist was modified. + */ + if(cache_ptr->slist_changed) + cache_ptr->slist_change_in_pre_serialize = TRUE; - if ( status < 0 ) { +#if H5C_DO_MEMORY_SANITY_CHECKS + HDassert(0 == HDmemcmp(((uint8_t *)entry_ptr->image_ptr) + image_len, + H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE)); +#endif /* H5C_DO_MEMORY_SANITY_CHECKS */ - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ - "unable to flush entry") + entry_ptr->image_up_to_date = TRUE; } + } /* end if ( ! (entry_ptr->image_up_to_date) ) */ + + /* Finally, write the image to disk. + * + * Note that if either the H5C__CLASS_NO_IO_FLAG or the + * the H5AC__CLASS_SKIP_WRITES flag is set in the + * in the entry's type, we silently skip the write. This + * flag should only be used in test code. + */ + if ( ( ((entry_ptr->type->flags) & H5C__CLASS_NO_IO_FLAG) == 0 ) && + ( ((entry_ptr->type->flags) & H5C__CLASS_SKIP_WRITES) == 0 ) ) + { + /* If compression is not enabled, the size of the entry on + * disk is entry_prt->size. However if entry_ptr->compressed + * is TRUE, the on disk size is entry_ptr->compressed_size. + */ + size_t image_size; -#ifdef H5_HAVE_PARALLEL - if ( flush_flags != H5C_CALLBACK__NO_FLAGS_SET ) { - - /* In the parallel case, flush operations can - * cause problems. If they occur, scream and - * die. - * - * At present, in the parallel case, the aux_ptr - * will only be set if there is more than one - * process. Thus we can use this to detect - * the parallel case. - * - * This works for now, but if we start using the - * aux_ptr for other purposes, we will have to - * change this test accordingly. - * - * NB: While this test detects entryies that attempt - * to resize or move themselves during a flush - * in the parallel case, it will not detect an - * entry that dirties, resizes, and/or moves - * other entries during its flush. - * - * From what Quincey tells me, this test is - * sufficient for now, as any flush routine that - * does the latter will also do the former. - * - * If that ceases to be the case, further - * tests will be necessary. - */ - if ( cache_ptr->aux_ptr != NULL ) - - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "resize/move in serialize occured in parallel case.") + if(entry_ptr->compressed) + image_size = entry_ptr->compressed_size; + else + image_size = entry_ptr->size; - } -#endif /* H5_HAVE_PARALLEL */ + if(H5F_block_write(f, entry_ptr->type->mem_type, entry_ptr->addr, + image_size, dxpl_id, entry_ptr->image_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't write image to file.") } - if ( ( ! destroy ) && ( entry_ptr->in_slist ) ) { + /* if the entry has a notify callback, notify it that we have + * just flushed the entry. + */ + if(entry_ptr->type->notify && + (entry_ptr->type->notify)(H5C_NOTIFY_ACTION_AFTER_FLUSH, entry_ptr) < 0 ) + HGOTO_ERROR(H5E_CACHE, H5E_CANTNOTIFY, FAIL, "can't notify client of entry flush") + } /* if ( write_entry ) */ - H5C__REMOVE_ENTRY_FROM_SLIST(cache_ptr, entry_ptr) + /* At this point, all pre-serialize and serialize calls have been + * made if it was appropriate to make them. Similarly, the entry + * has been written to disk if desired. + * + * Thus it is now safe to update the cache data structures for the + * flush. + */ - } + /* start by updating the statistics */ + if(clear_only) { + /* only log a clear if the entry was dirty */ + if(was_dirty) { + H5C__UPDATE_STATS_FOR_CLEAR(cache_ptr, entry_ptr) + } /* end if */ + } else if(write_entry) { + HDassert(was_dirty); - if ( ( ! destroy ) && ( was_dirty ) ) { + /* only log a flush if we actually wrote to disk */ + H5C__UPDATE_STATS_FOR_FLUSH(cache_ptr, entry_ptr) + } - H5C__UPDATE_INDEX_FOR_ENTRY_CLEAN(cache_ptr, entry_ptr); - } + if(destroy) { + if(take_ownership) + HDassert(!destroy_entry); + else + HDassert(destroy_entry); - if ( ! destroy ) { /* i.e. if the entry still exists */ + H5C__UPDATE_STATS_FOR_EVICTION(cache_ptr, entry_ptr, take_ownership) + } - HDassert( !(entry_ptr->is_dirty) ); - HDassert( !(entry_ptr->flush_marker) ); - HDassert( !(entry_ptr->in_slist) ); - HDassert( !(entry_ptr->is_protected) ); - HDassert( !(entry_ptr->is_read_only) ); - HDassert( (entry_ptr->ro_ref_count) == 0 ); + /* If the entry's type has a 'notify' callback and the entry is about + * to be removed from the cache, send a 'before eviction' notice while + * the entry is still fully integrated in the cache. + */ + if(destroy) + if(entry_ptr->type->notify && (entry_ptr->type->notify)(H5C_NOTIFY_ACTION_BEFORE_EVICT, entry_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTNOTIFY, FAIL, "can't notify client about entry to evict") + + /* Update the cache internal data structures. */ + if(destroy) { + /* Update the cache internal data structures as appropriate + * for a destroy. Specifically: + * + * 1) Delete it from the index + * + * 2) Delete it from the skip list if requested. + * + * 3) Update the replacement policy for eviction + * + * Finally, if the destroy_entry flag is set, discard the + * entry. + */ - if ( (flush_flags & H5C_CALLBACK__SIZE_CHANGED_FLAG) != 0 ) { + H5C__DELETE_FROM_INDEX(cache_ptr, entry_ptr) - /* The entry size changed as a result of the flush. - * - * Most likely, the entry was compressed, and the - * new version is of a different size than the old. - * - * In any case, we must update entry and cache size - * accordingly. - */ - size_t new_size; + if(entry_ptr->in_slist && del_from_slist_on_destroy) + H5C__REMOVE_ENTRY_FROM_SLIST(cache_ptr, entry_ptr) - if ( (entry_ptr->type->size)(f, (void *)entry_ptr, &new_size) - < 0 ) { + H5C__UPDATE_RP_FOR_EVICTION(cache_ptr, entry_ptr, FAIL) + } + else { + HDassert(clear_only || write_entry); + HDassert(entry_ptr->is_dirty); + HDassert(entry_ptr->in_slist); - HGOTO_ERROR(H5E_RESOURCE, H5E_CANTGETSIZE, FAIL, \ - "Can't get entry size after flush") - } + /* We are either doing a flush or a clear. + * + * A clear and a flush are the same from the point of + * view of the replacement policy and the slist. + * Hence no differentiation between them. + * + * JRM -- 7/7/07 + */ - if ( new_size != entry_ptr->size ) { + H5C__UPDATE_RP_FOR_FLUSH(cache_ptr, entry_ptr, FAIL) - HDassert( entry_ptr->size < H5C_MAX_ENTRY_SIZE ); + H5C__REMOVE_ENTRY_FROM_SLIST(cache_ptr, entry_ptr) - /* update the hash table for the size change - * We pass TRUE as the was_clean parameter, as we - * have already updated the clean and dirty index - * size fields for the fact that the entry has - * been flushed. (See above call to - * H5C__UPDATE_INDEX_FOR_ENTRY_CLEAN()). - */ - H5C__UPDATE_INDEX_FOR_SIZE_CHANGE((cache_ptr), \ - (entry_ptr->size), \ - (new_size), \ - (entry_ptr), \ - (TRUE)) - - /* The entry can't be protected since we just flushed it. - * Thus we must update the replacement policy data - * structures for the size change. The macro deals - * with the pinned case. - */ - H5C__UPDATE_RP_FOR_SIZE_CHANGE(cache_ptr, entry_ptr, \ - new_size) + /* mark the entry as clean and update the index for + * entry clean. Also, call the clear callback + * if defined. + */ + entry_ptr->is_dirty = FALSE; - /* The entry can't be in the slist, so no need to update - * the slist for the size change. - */ + H5C__UPDATE_INDEX_FOR_ENTRY_CLEAN(cache_ptr, entry_ptr); - /* update stats for the size change */ - H5C__UPDATE_STATS_FOR_ENTRY_SIZE_CHANGE(cache_ptr, \ - entry_ptr, \ - new_size) + if(entry_ptr->type->clear && (entry_ptr->type->clear)(f, (void *)entry_ptr, FALSE) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to clear entry") + } - /* finally, update the entry size proper */ - entry_ptr->size = new_size; - } - } + /* reset the flush_in progress flag */ + entry_ptr->flush_in_progress = FALSE; - if ( (flush_flags & H5C_CALLBACK__MOVED_FLAG) != 0 ) { - - /* The entry was moved as the result of the flush. - * - * Most likely, the entry was compressed, and the - * new version is larger than the old and thus had - * to be relocated. - * - * At preset, all processing for this case is - * handled elsewhere. But lets keep the if statement - * around just in case. - */ + /* Internal cache data structures should now be up to date, and + * consistant with the status of the entry. + * + * Now discard the entry if appropriate. + */ + if(destroy) { + /* start by freeing the buffer for the on disk image */ + if(entry_ptr->image_ptr != NULL) + entry_ptr->image_ptr = H5MM_xfree(entry_ptr->image_ptr); - } + /* Check whether we should free the space in the file that + * the entry occupies + */ + if(free_file_space) { + size_t fsf_size; - /* reset the flush_in progress flag */ - entry_ptr->flush_in_progress = FALSE; - } + /* Sanity checks */ + HDassert(H5F_addr_defined(entry_ptr->addr)); + HDassert(!H5F_IS_TMP_ADDR(f, entry_ptr->addr)); +#ifndef NDEBUG +{ + hbool_t curr_compressed = FALSE; + size_t curr_len; + size_t curr_compressed_len = 0; + + /* Get the actual image size for the thing again */ + entry_ptr->type->image_len((void *)entry_ptr, &curr_len, &curr_compressed, &curr_compressed_len); + HDassert(curr_len == entry_ptr->size); + HDassert(curr_compressed == entry_ptr->compressed); + HDassert(curr_compressed_len == entry_ptr->compressed_size); +} +#endif /* NDEBUG */ + + /* if the file space free size callback is defined, use + * it to get the size of the block of file space to free. + * Otherwise use entry_ptr->compressed_size if + * entry_ptr->compressed == TRUE, and entry_ptr->size + * if entry_ptr->compressed == FALSE. + */ + if(entry_ptr->type->fsf_size) { + if((entry_ptr->type->fsf_size)((void *)entry_ptr, &fsf_size) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "unable to get file space free size") + } /* end if */ + else if(entry_ptr->compressed) /* use compressed size */ + fsf_size = entry_ptr->compressed_size; + else /* no file space free size callback -- use entry size */ + fsf_size = entry_ptr->size; + + /* Release the space on disk */ + if(H5MF_xfree(f, entry_ptr->type->mem_type, dxpl_id, entry_ptr->addr, (hsize_t)fsf_size) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFREE, FAIL, "unable to free file space for cache entry") + } /* end if ( free_file_space ) */ + + /* Reset the pointer to the cache the entry is within. -QAK */ + entry_ptr->cache_ptr = NULL; + + /* increment entries_removed_counter and set + * last_entry_removed_ptr. As we are likely abuut to + * free the entry, recall that last_entry_removed_ptr + * must NEVER be dereferenced. + * + * Recall that these fields are maintained to allow functions + * that perform scans of lists of entries to detect the + * unexpected removal of entries (via expunge, eviction, + * or take ownership at present), so that they can re-start + * their scans if necessary. + */ + cache_ptr->last_entry_removed_ptr++; + cache_ptr->last_entry_removed_ptr = entry_ptr; + + /* Check for actually destroying the entry in memory */ + /* (As opposed to taking ownership of it) */ + if(destroy_entry) { + /* if the entry is dirty and it has a clear callback, + * call this callback now. Since this callback exists, + * it follows tht the client maintains its own dirty bits, + * which must be cleared before the entry is freed to avoid + * sanity check failures. Also clear the dirty flag for + * the same reason. + */ + if(entry_ptr->is_dirty) { + entry_ptr->is_dirty = FALSE; - if ( cache_ptr->log_flush ) { + if(entry_ptr->type->clear && (entry_ptr->type->clear)(f, (void *)entry_ptr, TRUE) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to clear entry") + } - status = (cache_ptr->log_flush)(cache_ptr, addr, was_dirty, flags); + /* we are about to discard the in core representation -- + * set the magic field to bad magic so we can detect a + * freed entry if we see one. + */ + entry_ptr->magic = H5C__H5C_CACHE_ENTRY_T_BAD_MAGIC; - if ( status < 0 ) { + /* verify that the image has been freed */ + HDassert(entry_ptr->image_ptr == NULL); - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ - "log_flush callback failed.") - } + if(entry_ptr->type->free_icr((void *)entry_ptr) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "free_icr callback failed.") + } + else { + HDassert(take_ownership); + + /* client is taking ownership of the entry. + * set bad magic here too so the cache will choke + * unless the entry is re-inserted properly + */ + entry_ptr->magic = H5C__H5C_CACHE_ENTRY_T_BAD_MAGIC; } - } + } /* if (destroy) */ + + if(cache_ptr->log_flush) + if((cache_ptr->log_flush)(cache_ptr, entry_ptr->addr, was_dirty, flags) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "log_flush callback failed.") done: - HDassert( ( destroy ) || - ( ( entry_ptr ) && ( ! entry_ptr->flush_in_progress ) ) ); + HDassert( ( ret_value != SUCCEED ) || ( destroy_entry ) || + ( ! entry_ptr->flush_in_progress ) ); + + HDassert( ( ret_value != SUCCEED ) || ( destroy_entry ) || + ( take_ownership ) || ( ! entry_ptr->is_dirty ) ); + FUNC_LEAVE_NOAPI(ret_value) -} /* H5C_flush_single_entry() */ +} /* H5C__flush_single_entry() */ /*------------------------------------------------------------------------- @@ -8637,8 +8183,19 @@ H5C_load_entry(H5F_t * f, haddr_t addr, void * udata) { + hbool_t dirty = FALSE; /* Flag indicating whether thing was dirtied during deserialize */ + hbool_t compressed = FALSE; /* flag indicating whether thing */ + /* will be run through filters on */ + /* on read and write. Usually FALSE */ + /* set to true if appropriate. */ + size_t compressed_size = 0; /* entry compressed size if */ + /* known -- otherwise uncompressed. */ + /* Zero indicates compression not */ + /* enabled. */ + void * image = NULL; /* Buffer for disk image */ void * thing = NULL; /* Pointer to thing loaded */ H5C_cache_entry_t * entry; /* Alias for thing loaded, as cache entry */ + size_t len; /* Size of image in file */ unsigned u; /* Local index variable */ void * ret_value; /* Return value */ @@ -8648,14 +8205,310 @@ H5C_load_entry(H5F_t * f, HDassert(f->shared); HDassert(f->shared->cache); HDassert(type); - HDassert(type->load); - HDassert(type->size); + + /* verify absence of prohibited or unsupported type flag combinations */ + HDassert(!(type->flags & H5C__CLASS_NO_IO_FLAG)); + + /* for now, we do not combine the speculative load and compressed flags */ + HDassert(!((type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG) && + (type->flags & H5C__CLASS_COMPRESSED_FLAG))); + + /* Can't see how skip reads could be usefully combined with + * either the speculative read or compressed flags. Hence disallow. + */ + HDassert(!((type->flags & H5C__CLASS_SKIP_READS) && + (type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG))); + HDassert(!((type->flags & H5C__CLASS_SKIP_READS) && + (type->flags & H5C__CLASS_COMPRESSED_FLAG))); + HDassert(H5F_addr_defined(addr)); + HDassert(type->get_load_size); + HDassert(type->deserialize); + + /* Call the get_load_size callback, to retrieve the initial + * size of image + */ + if(type->get_load_size(udata, &len) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, "can't retrieve image size") + + HDassert(len > 0); + + /* Check for possible speculative read off the end of the file */ + if(type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG) { + +/* Quincey has added patches for eoa calculations -- leave the original + * code around until we see the effect of these patches. + * JRM -- 1/1/15 + */ +#if 0 /* original code */ /* JRM */ + /* the original version of this code has several problems: + * + * First, the sblock is not available until the sblock + * has been read in, which causes a seg fault. This is + * dealt with easily enough by testing to see if + * f->shared->sblock is NULL, and calling H5FD_get_base_addr() + * to obtain the base addr when it is. + * + * The second issue is more subtle. H5F_get_eoa() calls + * H5FD_get_eoa(). However, this function returns the EOA as + * a relative address -- i.e. relative to the base address. + * This means that the base addr + addr < eoa sanity check will + * fail whenever the super block is not at address 0 when + * reading in the first chunk of the super block. + * + * To address these issues, I have rewritten the code to + * simply verify that the address plus length is less than + * the eoa. I think this is sufficient, but further testing + * should tell me if it isn't. + * JRM -- 8/29/14 + */ + haddr_t eoa; /* End-of-allocation in the file */ + haddr_t base_addr; /* Base address of file data */ + + /* Get the file's end-of-allocation value */ + eoa = H5F_get_eoa(f, type->mem_type); + HDassert(H5F_addr_defined(eoa)); + + /* Get the file's base address */ + if ( f->shared->sblock ) + + base_addr = H5F_BASE_ADDR(f); + + else { /* sblock not loaded yet -- use file driver info */ + + HDassert(f->shared->lf); + base_addr = H5FD_get_base_addr(f->shared->lf); + + } + HDassert(H5F_addr_defined(base_addr)); + + /* Check for bad address in general */ + if((addr + base_addr) > eoa) + HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, \ + "address of object past end of allocation") + + /* Check if the amount of data to read will be past the eoa */ + if((addr + base_addr + len) > eoa) + /* Trim down the length of the metadata */ + len = (size_t)(eoa - (addr + base_addr)); + +#else /* modified code */ /* JRM */ + + haddr_t eoa; /* End-of-allocation in the file */ + H5FD_mem_t cooked_type; + + /* if type == H5FD_MEM_GHEAP, H5F_block_read() forces + * type to H5FD_MEM_DRAW via its call to H5F__accum_read(). + * Thus we do the same for purposes of computing the eoa + * for sanity checks. + */ + cooked_type = + (type->mem_type == H5FD_MEM_GHEAP) ? H5FD_MEM_DRAW : type->mem_type; + + /* Get the file's end-of-allocation value */ + eoa = H5F_get_eoa(f, cooked_type); + + HDassert(H5F_addr_defined(eoa)); + + /* Check for bad address in general */ + if ( H5F_addr_gt(addr, eoa) ) + + HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, \ + "address of object past end of allocation") + + /* Check if the amount of data to read will be past the eoa */ + if( H5F_addr_gt((addr + len), eoa) ) { + + /* Trim down the length of the metadata */ + + /* Note that for some cache clients, this will cause an + * assertion failure. JRM -- 8/29/14 + */ + len = (size_t)(eoa - addr); + } + + if ( len <= 0 ) + HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, \ + "len not positive after adjustment for EOA.") + +#endif /* modified code */ /* JRM */ + } + /* Allocate the buffer for reading the on-disk entry image */ + if(NULL == (image = H5MM_malloc(len + H5C_IMAGE_EXTRA_SPACE))) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, NULL, \ + "memory allocation failed for on disk image buffer.") + +#if H5C_DO_MEMORY_SANITY_CHECKS + HDmemcpy(((uint8_t *)image) + len, H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE); +#endif /* H5C_DO_MEMORY_SANITY_CHECKS */ + + /* Get the on-disk entry image */ + if ( 0 == (type->flags & H5C__CLASS_SKIP_READS) ) + if(H5F_block_read(f, type->mem_type, addr, len, dxpl_id, image) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_READERROR, NULL, "Can't read image*") + + /* Deserialize the on-disk image into the native memory form */ + if(NULL == (thing = type->deserialize(image, len, udata, &dirty))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTLOAD, NULL, "Can't deserialize image") + + /* If the client's cache has an image_len callback, check it */ + if(type->image_len) { + size_t new_len; /* New size of on-disk image */ + + /* set magic and type field in *entry_ptr. While the image_len + * callback shouldn't touch the cache specific fields, it may check + * these fields to ensure that it it has received the expected + * value. + * + * Note that this initialization is repeated below on the off + * chance that we had to re-try the deserialization. + */ + entry = (H5C_cache_entry_t *)thing; + entry->magic = H5C__H5C_CACHE_ENTRY_T_MAGIC; + entry->type = type; + + /* verify that compressed and compressed_len are initialized */ + HDassert(compressed == FALSE); + HDassert(compressed_size == 0); + + /* Get the actual image size for the thing */ + if(type->image_len(thing, &new_len, &compressed, &compressed_size) < 0) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, \ + "can't retrieve image length") + + if(new_len == 0) + + HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, "image length is 0") + + HDassert(((type->flags & H5C__CLASS_COMPRESSED_FLAG) != 0) || + ((compressed == FALSE) && (compressed_size == 0))); + HDassert((compressed == TRUE) || (compressed_size == 0)); + + if(new_len != len) { + + if(type->flags & H5C__CLASS_COMPRESSED_FLAG) { + + /* if new_len != len, then compression must be + * enabled on the entry. In this case, the image_len + * callback should have set compressed to TRUE, set + * new_len equal to the uncompressed size of the + * entry, and compressed_len equal to the compressed + * size -- which must equal len. + * + * We can't verify the uncompressed size, but we can + * verify the rest with the following assertions. + */ + HDassert(compressed); + HDassert(compressed_size == len); + + /* new_len should contain the uncompressed size. Set len + * equal to new_len, so that the cache will use the + * uncompressed size for purposes of space allocation, etc. + */ + len = new_len; + + } else if (type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG) { + + void *new_image; /* Buffer for disk image */ + + /* compressed must be FALSE, and compressed_size + * must be zero. + */ + HDassert(!compressed); + HDassert(compressed_size == 0); + + /* Adjust the size of the image to match new_len */ + if(NULL == (new_image = H5MM_realloc(image, + new_len + H5C_IMAGE_EXTRA_SPACE))) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, NULL, \ + "image null after H5MM_realloc()") + + image = new_image; + +#if H5C_DO_MEMORY_SANITY_CHECKS + + HDmemcpy(((uint8_t *)image) + new_len, + H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE); + +#endif /* H5C_DO_MEMORY_SANITY_CHECKS */ + + /* If the thing's image needs to be bigger for a speculatively + * loaded thing, free the thing and retry with new length + */ + if (new_len > len) { + + /* Release previous (possibly partially initialized) + * thing. Note that we must set entry->magic to + * H5C__H5C_CACHE_ENTRY_T_BAD_MAGIC and set one or + * two other fields before the call to free_icr + * so as to avoid sanity check failures. + */ + entry->magic = H5C__H5C_CACHE_ENTRY_T_BAD_MAGIC; + + entry->addr = addr; + + if ( type->free_icr(thing) < 0 ) - if(NULL == (thing = (type->load)(f, dxpl_id, addr, udata))) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL, \ + "free_icr callback failed") - HGOTO_ERROR(H5E_CACHE, H5E_CANTLOAD, NULL, "unable to load entry") + /* Go get the on-disk image again */ + if(H5F_block_read(f, type->mem_type, addr, + new_len, dxpl_id, image) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTLOAD, NULL, \ + "Can't read image") + + /* Deserialize on-disk image into native memory + * form again + */ + if(NULL == (thing = type->deserialize(image, new_len, + udata, &dirty))) + + HGOTO_ERROR(H5E_CACHE, H5E_CANTLOAD, NULL, \ + "Can't deserialize image") + +#ifndef NDEBUG + { + /* new_compressed and new_compressed_size must be + * initialize to FALSE / 0 respectively, as clients + * that don't use compression may ignore these two + * parameters. + */ + hbool_t new_compressed = FALSE; + size_t new_compressed_size = 0; + size_t new_new_len; + + /* Get the actual image size for the thing again. Note + * that since this is a new thing, we have to set + * the magic and type fields again so as to avoid + * failing sanity checks. + */ + entry = (H5C_cache_entry_t *)thing; + entry->magic = H5C__H5C_CACHE_ENTRY_T_MAGIC; + entry->type = type; + + type->image_len(thing, &new_new_len, &new_compressed, &new_compressed_size); + HDassert(new_new_len == new_len); + HDassert(!new_compressed); + HDassert(new_compressed_size == 0); + } +#endif /* NDEBUG */ + } /* end if (new_len > len) */ + + /* Retain adjusted size */ + len = new_len; + + } else { /* throw an error */ + + HGOTO_ERROR(H5E_CACHE, H5E_UNSUPPORTED, NULL, \ + "size of non-speculative, non-compressed object changed") + } + } /* end if (new_len != len) */ + } /* end if */ entry = (H5C_cache_entry_t *)thing; @@ -8663,35 +8516,39 @@ H5C_load_entry(H5F_t * f, * * However, when this code is used in the metadata cache, it is * possible that object headers will be dirty at this point, as - * the load function will alter object headers if necessary to + * the deserialize function will alter object headers if necessary to * fix an old bug. * - * To support this bug fix, I have replace the old assert: - * - * HDassert( entry->is_dirty == FALSE ); + * In the following assert: * - * with: + * HDassert( ( dirty == FALSE ) || ( type->id == 5 || type->id == 6 ) ); * - * HDassert( ( entry->is_dirty == FALSE ) || ( type->id == 5 ) ); - * - * Note that type id 5 is associated with object headers in the metadata - * cache. + * note that type ids 5 & 6 are associated with object headers in the + * metadata cache. * * When we get to using H5C for other purposes, we may wish to * tighten up the assert so that the loophole only applies to the * metadata cache. */ - HDassert( ( entry->is_dirty == FALSE ) || ( type->id == 5 ) ); -#ifndef NDEBUG + HDassert( ( dirty == FALSE ) || ( type->id == 5 || type->id == 6) ); + entry->magic = H5C__H5C_CACHE_ENTRY_T_MAGIC; -#endif /* NDEBUG */ entry->cache_ptr = f->shared->cache; entry->addr = addr; + entry->size = len; + HDassert(entry->size < H5C_MAX_ENTRY_SIZE); + entry->compressed = compressed; + entry->compressed_size = compressed_size; + entry->image_ptr = image; + entry->image_up_to_date = TRUE; entry->type = type; + entry->is_dirty = dirty; + entry->dirtied = FALSE; entry->is_protected = FALSE; entry->is_read_only = FALSE; entry->ro_ref_count = 0; + entry->is_pinned = FALSE; entry->in_slist = FALSE; entry->flush_marker = FALSE; #ifdef H5_HAVE_PARALLEL @@ -8700,13 +8557,6 @@ H5C_load_entry(H5F_t * f, #endif /* H5_HAVE_PARALLEL */ entry->flush_in_progress = FALSE; entry->destroy_in_progress = FALSE; - entry->free_file_space_on_destroy = FALSE; - - if((type->size)(f, thing, &(entry->size)) < 0) - - HGOTO_ERROR(H5E_RESOURCE, H5E_CANTGETSIZE, NULL, "Can't get size of thing") - - HDassert( entry->size < H5C_MAX_ENTRY_SIZE ); /* Initialize flush dependency height fields */ entry->flush_dep_parent = NULL; @@ -8727,6 +8577,18 @@ H5C_load_entry(H5F_t * f, ret_value = thing; done: + /* Cleanup on error */ + if(NULL == ret_value) { + + /* Release resources */ + if ( thing && type->free_icr(thing) < 0 ) + + HDONE_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL, \ + "free_icr callback failed") + + if(image) + image = H5MM_xfree(image); + } /* end if */ FUNC_LEAVE_NOAPI(ret_value) } /* H5C_load_entry() */ @@ -8765,39 +8627,47 @@ done: * * Programmer: John Mainzer, 5/14/04 * - * JRM -- 11/13/08 - * Modified function to always observe the min_clean_size - * whether we are maintaining the clean and dirt LRU lists - * or not. To do this, we had to add the new clean_index_size - * and dirty_index_size fields to H5C_t, and supporting code - * as needed throughout the cache. + * Changes: Modified function to skip over entries with the + * flush_in_progress flag set. If this is not done, + * an infinite recursion is possible if the cache is + * full, and the pre-serialize or serialize routine + * attempts to load another entry. * - * The purpose of this modification is to avoid "metadata - * blizzards" in the write only case. In such instances, - * the cache was allowed to fill with dirty metadata. When - * we finally needed to evict an entry to make space, we had - * to flush out a whole cache full of metadata -- which has - * interesting performance effects. We hope to avoid (or - * perhaps more accurately hide) this effect by maintaining - * the min_clean_size, which should force us to start flushing - * entries long before we actually have to evict something - * to make space. + * This error was exposed by a re-factor of the + * H5C__flush_single_entry() routine. However, it was + * a potential bug from the moment that entries were + * allowed to load other entries on flush. + * + * In passing, note that the primary and secondary dxpls + * mentioned in the comment above have been replaced by + * a single dxpl at some point, and thus the discussion + * above is somewhat obsolete. Date of this change is + * unkown. + * + * JRM -- 12/26/14 + * + * Modified function to detect deletions of entries + * during a scan of the LRU, and where appropriate, + * restart the scan to avoid proceeding with a next + * entry that is no longer in the cache. + * + * Note the absence of checks after flushes of clean + * entries. As a second entry can only be removed by + * by a call to the pre_serialize or serialize callback + * of the first, and as these callbacks will not be called + * on clean entries, no checks are needed. + * + * JRM -- 4/6/15 * - * MAM -- 01/06/09 - * Added code to maintain clean_entries_skipped and total_entries - * scanned statistics. *------------------------------------------------------------------------- */ static herr_t H5C_make_space_in_cache(H5F_t * f, - hid_t primary_dxpl_id, - hid_t secondary_dxpl_id, + hid_t dxpl_id, size_t space_needed, - hbool_t write_permitted, - hbool_t * first_flush_ptr) + hbool_t write_permitted) { H5C_t * cache_ptr = f->shared->cache; - herr_t result; #if H5C_COLLECT_CACHE_STATS int32_t clean_entries_skipped = 0; int32_t total_entries_scanned = 0; @@ -8807,6 +8677,7 @@ H5C_make_space_in_cache(H5F_t * f, size_t empty_space; hbool_t prev_is_dirty = FALSE; hbool_t didnt_flush_entry = FALSE; + hbool_t restart_scan; H5C_cache_entry_t * entry_ptr; H5C_cache_entry_t * prev_ptr; H5C_cache_entry_t * next_ptr; @@ -8817,13 +8688,12 @@ H5C_make_space_in_cache(H5F_t * f, HDassert( f ); HDassert( cache_ptr ); HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); - HDassert( first_flush_ptr != NULL ); - HDassert( ( *first_flush_ptr == TRUE ) || ( *first_flush_ptr == FALSE ) ); HDassert( cache_ptr->index_size == (cache_ptr->clean_index_size + cache_ptr->dirty_index_size) ); if ( write_permitted ) { + restart_scan = FALSE; initial_list_len = cache_ptr->LRU_list_len; entry_ptr = cache_ptr->LRU_tail_ptr; @@ -8854,6 +8724,7 @@ H5C_make_space_in_cache(H5F_t * f, ( entry_ptr != NULL ) ) { + HDassert(entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC); HDassert( ! (entry_ptr->is_protected) ); HDassert( ! (entry_ptr->is_read_only) ); HDassert( (entry_ptr->ro_ref_count) == 0 ); @@ -8866,7 +8737,8 @@ H5C_make_space_in_cache(H5F_t * f, prev_is_dirty = prev_ptr->is_dirty; } - if ( (entry_ptr->type)->id != H5C__EPOCH_MARKER_TYPE ) { + if ( ( (entry_ptr->type)->id != H5C__EPOCH_MARKER_TYPE ) && + ( ! entry_ptr->flush_in_progress ) ) { didnt_flush_entry = FALSE; @@ -8881,14 +8753,25 @@ H5C_make_space_in_cache(H5F_t * f, } #endif /* H5C_COLLECT_CACHE_STATS */ - result = H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - entry_ptr->type, - entry_ptr->addr, - H5C__NO_FLAGS_SET, - first_flush_ptr, - FALSE); + /* reset entries_removed_counter and + * last_entry_removed_ptr prior to the call to + * H5C__flush_single_entry() so that we can spot + * unexpected removals of entries from the cache, + * and set the restart_scan flag if proceeding + * would be likely to cause us to scan an entry + * that is no longer in the cache. + */ + cache_ptr->entries_removed_counter = 0; + cache_ptr->last_entry_removed_ptr = NULL; + + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__NO_FLAGS_SET, NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry") + + if ( ( cache_ptr->entries_removed_counter > 1 ) || + ( cache_ptr->last_entry_removed_ptr == prev_ptr ) ) + + restart_scan = TRUE; + } else if ( (cache_ptr->index_size + space_needed) > cache_ptr->max_cache_size ) { @@ -8896,26 +8779,15 @@ H5C_make_space_in_cache(H5F_t * f, cache_ptr->entries_scanned_to_make_space++; #endif /* H5C_COLLECT_CACHE_STATS */ - result = H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - entry_ptr->type, - entry_ptr->addr, - H5C__FLUSH_INVALIDATE_FLAG, - first_flush_ptr, - TRUE); - } else { + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry") - /* We have enough space so don't flush clean entry. - * Set result to SUCCEED to avoid triggering the error - * code below. - */ + } else { + /* We have enough space so don't flush clean entry. */ #if H5C_COLLECT_CACHE_STATS clean_entries_skipped++; #endif /* H5C_COLLECT_CACHE_STATS */ didnt_flush_entry = TRUE; - result = SUCCEED; - } #if H5C_COLLECT_CACHE_STATS @@ -8924,40 +8796,27 @@ H5C_make_space_in_cache(H5F_t * f, } else { - /* Skip epoch markers. Set result to SUCCEED to avoid - * triggering the error code below. + /* Skip epoch markers and entries that are in the process + * of being flushed. */ didnt_flush_entry = TRUE; - result = SUCCEED; - } - - if ( result < 0 ) { - - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ - "unable to flush entry") } if ( prev_ptr != NULL ) { -#ifndef NDEBUG - if ( prev_ptr->magic != H5C__H5C_CACHE_ENTRY_T_MAGIC ) { - - /* something horrible has happened to *prev_ptr -- - * scream and die. - */ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, \ - "*prev_ptr corrupt 1") - } -#endif /* NDEBUG */ if ( didnt_flush_entry ) { - /* epoch markers don't get flushed, so the sanity checks - * on normal entries will fail -- thus just set entry_ptr - * to prev_ptr and go on. + /* epoch markers don't get flushed, and we don't touch + * entries that are in the process of being flushed. + * Hence no need for sanity checks, as we haven't + * flushed anything. Thus just set entry_ptr to prev_ptr + * and go on. */ entry_ptr = prev_ptr; - } else if ( ( prev_ptr->is_dirty != prev_is_dirty ) + } else if ( ( restart_scan ) + || + ( prev_ptr->is_dirty != prev_is_dirty ) || ( prev_ptr->next != next_ptr ) || @@ -8968,7 +8827,9 @@ H5C_make_space_in_cache(H5F_t * f, /* something has happened to the LRU -- start over * from the tail. */ + restart_scan = FALSE; entry_ptr = cache_ptr->LRU_tail_ptr; + H5C__UPDATE_STATS_FOR_LRU_SCAN_RESTART(cache_ptr) } else { @@ -9055,20 +8916,14 @@ H5C_make_space_in_cache(H5F_t * f, prev_ptr = entry_ptr->aux_prev; - result = H5C_flush_single_entry(f, - primary_dxpl_id, - secondary_dxpl_id, - entry_ptr->type, - entry_ptr->addr, - H5C__FLUSH_INVALIDATE_FLAG, - first_flush_ptr, - TRUE); + if(H5C__flush_single_entry(f, dxpl_id, entry_ptr, H5C__FLUSH_INVALIDATE_FLAG | H5C__DEL_FROM_SLIST_ON_DESTROY_FLAG, NULL) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush entry") - if ( result < 0 ) { - - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ - "unable to flush entry") - } + /* we are scanning the clean LRU, so the serialize function + * will not be called on any entry -- thus there is no + * concern about the list being modified out from under + * this function. + */ entry_ptr = prev_ptr; entries_examined++; @@ -9482,6 +9337,66 @@ done: /*------------------------------------------------------------------------- * + * Function: H5C_entry_in_skip_list + * + * Purpose: Debugging function that scans skip list to see if it + * is in present. We need this, as it is possible for + * an entry to be in the skip list twice. + * + * Return: FALSE if the entry is not in the skip list, and TRUE + * if it is. + * + * Programmer: John Mainzer, 11/1/14 + * + * Changes: + * + * None. + * + *------------------------------------------------------------------------- + */ +#if H5C_DO_SLIST_SANITY_CHECKS + +static hbool_t +H5C_entry_in_skip_list(H5C_t * cache_ptr, H5C_cache_entry_t *target_ptr) +{ + hbool_t in_slist = FALSE; + H5SL_node_t * node_ptr = NULL; + H5C_cache_entry_t * entry_ptr = NULL; + + HDassert( cache_ptr ); + HDassert( cache_ptr->magic == H5C__H5C_T_MAGIC ); + HDassert( cache_ptr->slist_ptr ); + + node_ptr = H5SL_first(cache_ptr->slist_ptr); + + while ( ( node_ptr != NULL ) && ( ! in_slist ) ) + { + entry_ptr = (H5C_cache_entry_t *)H5SL_item(node_ptr); + + HDassert( entry_ptr ); + HDassert( entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC ); + HDassert( entry_ptr->is_dirty ); + HDassert( entry_ptr->in_slist ); + + if ( entry_ptr == target_ptr ) { + + in_slist = TRUE; + + } else { + + node_ptr = H5SL_next(node_ptr); + } + } + + return(in_slist); + +} /* H5C_entry_in_skip_list() */ + +#endif /* H5C_DO_SLIST_SANITY_CHECKS */ + + +/*------------------------------------------------------------------------- + * * Function: H5C_get_entry_ptr_from_addr() * * Purpose: Debugging function that attempts to look up an entry in the @@ -9784,7 +9699,7 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5C_flush_tagged_entries(H5F_t * f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, H5C_t * cache_ptr, haddr_t tag) +H5C_flush_tagged_entries(H5F_t * f, hid_t dxpl_id, H5C_t * cache_ptr, haddr_t tag) { herr_t ret_value = SUCCEED; @@ -9800,7 +9715,7 @@ H5C_flush_tagged_entries(H5F_t * f, hid_t primary_dxpl_id, hid_t secondary_dxpl_ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't mark tagged entries") /* Flush all marked entries */ - if(H5C_flush_marked_entries(f, primary_dxpl_id, secondary_dxpl_id, cache_ptr) < 0) + if(H5C_flush_marked_entries(f, dxpl_id) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush marked entries") done: @@ -9873,7 +9788,7 @@ H5C_mark_tagged_entries(H5C_t * cache_ptr, haddr_t tag) *------------------------------------------------------------------------- */ static herr_t -H5C_flush_marked_entries(H5F_t * f, hid_t primary_dxpl_id, hid_t secondary_dxpl_id, H5C_t * cache_ptr) +H5C_flush_marked_entries(H5F_t * f, hid_t dxpl_id) { herr_t ret_value = SUCCEED; @@ -9881,12 +9796,10 @@ H5C_flush_marked_entries(H5F_t * f, hid_t primary_dxpl_id, hid_t secondary_dxpl_ /* Assertions */ HDassert(0); /* This function is not yet used. We shouldn't be in here yet. */ - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + HDassert(f != NULL); /* Flush all marked entries */ - if(H5C_flush_cache(f, primary_dxpl_id, secondary_dxpl_id, - H5C__FLUSH_MARKED_ENTRIES_FLAG | H5C__FLUSH_IGNORE_PROTECTED_FLAG) < 0) + if(H5C_flush_cache(f, dxpl_id, H5C__FLUSH_MARKED_ENTRIES_FLAG | H5C__FLUSH_IGNORE_PROTECTED_FLAG) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "Can't flush cache") done: @@ -9929,7 +9842,7 @@ H5C_verify_tag(int id, haddr_t tag) * constraints are met. */ /* Superblock */ - if(id == H5AC_SUPERBLOCK_ID) { + if((id == H5AC_SUPERBLOCK_ID) || (id == H5AC_DRVRINFO_ID)) { if(tag != H5AC__SUPERBLOCK_TAG) HGOTO_ERROR(H5E_CACHE, H5E_CANTTAG, FAIL, "superblock not tagged with H5AC__SUPERBLOCK_TAG") } @@ -10001,10 +9914,9 @@ H5C_retag_copied_metadata(H5C_t * cache_ptr, haddr_t metadata_tag) next_entry_ptr = cache_ptr->index[u]; while(next_entry_ptr != NULL) { - if(cache_ptr->index[u] != NULL) { + if(cache_ptr->index[u] != NULL) if((cache_ptr->index[u])->tag == H5AC__COPIED_TAG) (cache_ptr->index[u])->tag = metadata_tag; - } /* end if */ next_entry_ptr = next_entry_ptr->ht_next; } /* end while */ |