diff options
Diffstat (limited to 'src/H5Cpkg.h')
-rw-r--r-- | src/H5Cpkg.h | 2052 |
1 files changed, 1019 insertions, 1033 deletions
diff --git a/src/H5Cpkg.h b/src/H5Cpkg.h index 7c278e8..5df84cd 100644 --- a/src/H5Cpkg.h +++ b/src/H5Cpkg.h @@ -35,14 +35,16 @@ #ifndef _H5Cpkg_H #define _H5Cpkg_H - /* Get package's private header */ #include "H5Cprivate.h" - -/* Get needed headers */ +/* Other private headers needed by this file */ #include "H5SLprivate.h" /* Skip lists */ +/**************************/ +/* Package Private Macros */ +/**************************/ + /* With the introduction of the fractal heap, it is now possible for * entries to be dirtied, resized, and/or moved in the flush callbacks. * As a result, on flushes, it may be necessary to make multiple passes @@ -53,980 +55,14 @@ * * -- JRM */ - #define H5C__MAX_PASSES_ON_FLUSH 4 - - -/**************************************************************************** - * - * structure H5C_t - * - * Catchall structure for all variables specific to an instance of the cache. - * - * While the individual fields of the structure are discussed below, the - * following overview may be helpful. - * - * Entries in the cache are stored in an instance of H5TB_TREE, indexed on - * the entry's disk address. While the H5TB_TREE is less efficient than - * hash table, it keeps the entries in address sorted order. As flushes - * in parallel mode are more efficient if they are issued in increasing - * address order, this is a significant benefit. Also the H5TB_TREE code - * was readily available, which reduced development time. - * - * While the cache was designed with multiple replacement policies in mind, - * at present only a modified form of LRU is supported. - * - * JRM - 4/26/04 - * - * Profiling has indicated that searches in the instance of H5TB_TREE are - * too expensive. To deal with this issue, I have augmented the cache - * with a hash table in which all entries will be stored. Given the - * advantages of flushing entries in increasing address order, the TBBT - * is retained, but only dirty entries are stored in it. At least for - * now, we will leave entries in the TBBT after they are flushed. - * - * Note that index_size and index_len now refer to the total size of - * and number of entries in the hash table. - * - * JRM - 7/19/04 - * - * The TBBT has since been replaced with a skip list. This change - * greatly predates this note. - * - * JRM - 9/26/05 - * - * magic: Unsigned 32 bit integer always set to H5C__H5C_T_MAGIC. - * This field is used to validate pointers to instances of - * H5C_t. - * - * flush_in_progress: Boolean flag indicating whether a flush is in - * progress. - * - * trace_file_ptr: File pointer pointing to the trace file, which is used - * to record cache operations for use in simulations and design - * studies. This field will usually be NULL, indicating that - * no trace file should be recorded. - * - * Since much of the code supporting the parallel metadata - * cache is in H5AC, we don't write the trace file from - * H5C. Instead, H5AC reads the trace_file_ptr as needed. - * - * When we get to using H5C in other places, we may add - * code to write trace file data at the H5C level as well. - * - * aux_ptr: Pointer to void used to allow wrapper code to associate - * its data with an instance of H5C_t. The H5C cache code - * sets this field to NULL, and otherwise leaves it alone. - * - * max_type_id: Integer field containing the maximum type id number assigned - * to a type of entry in the cache. All type ids from 0 to - * max_type_id inclusive must be defined. The names of the - * types are stored in the type_name_table discussed below, and - * indexed by the ids. - * - * type_name_table_ptr: Pointer to an array of pointer to char of length - * max_type_id + 1. The strings pointed to by the entries - * in the array are the names of the entry types associated - * with the indexing type IDs. - * - * max_cache_size: Nominal maximum number of bytes that may be stored in the - * cache. This value should be viewed as a soft limit, as the - * cache can exceed this value under the following circumstances: - * - * a) All entries in the cache are protected, and the cache is - * asked to insert a new entry. In this case the new entry - * will be created. If this causes the cache to exceed - * max_cache_size, it will do so. The cache will attempt - * to reduce its size as entries are unprotected. - * - * b) When running in parallel mode, the cache may not be - * permitted to flush a dirty entry in response to a read. - * If there are no clean entries available to evict, the - * cache will exceed its maximum size. Again the cache - * will attempt to reduce its size to the max_cache_size - * limit on the next cache write. - * - * c) When an entry increases in size, the cache may exceed - * the max_cache_size limit until the next time the cache - * attempts to load or insert an entry. - * - * min_clean_size: Nominal minimum number of clean bytes in the cache. - * The cache attempts to maintain this number of bytes of - * clean data so as to avoid case b) above. Again, this is - * a soft limit. - * - * - * In addition to the call back functions required for each entry, the - * cache requires the following call back functions for this instance of - * the cache as a whole: - * - * check_write_permitted: In certain applications, the cache may not - * be allowed to write to disk at certain time. If specified, - * the check_write_permitted function is used to determine if - * a write is permissible at any given point in time. - * - * If no such function is specified (i.e. this field is NULL), - * the cache uses the following write_permitted field to - * determine whether writes are permitted. - * - * write_permitted: If check_write_permitted is NULL, this boolean flag - * indicates whether writes are permitted. - * - * log_flush: If provided, this function is called whenever a dirty - * entry is flushed to disk. - * - * - * In cases where memory is plentiful, and performance is an issue, it may - * be useful to disable all cache evictions, and thereby postpone metadata - * writes. The following field is used to implement this. - * - * evictions_enabled: Boolean flag that is initialized to TRUE. When - * this flag is set to FALSE, the metadata cache will not - * attempt to evict entries to make space for newly protected - * entries, and instead the will grow without limit. - * - * Needless to say, this feature must be used with care. - * - * - * The cache requires an index to facilitate searching for entries. The - * following fields support that index. - * - * index_len: Number of entries currently in the hash table used to index - * the cache. - * - * index_size: Number of bytes of cache entries currently stored in the - * hash table used to index the cache. - * - * This value should not be mistaken for footprint of the - * cache in memory. The average cache entry is small, and - * the cache has a considerable overhead. Multiplying the - * index_size by two should yield a conservative estimate - * of the cache's memory footprint. - * - * clean_index_size: Number of bytes of clean entries currently stored in - * the hash table. Note that the index_size field (above) - * is also the sum of the sizes of all entries in the cache. - * Thus we should have the invarient that clean_index_size + - * dirty_index_size == index_size. - * - * WARNING: - * - * 1) The clean_index_size field is not maintained by the - * index macros, as the hash table doesn't care whether - * the entry is clean or dirty. Instead the field is - * maintained in the H5C__UPDATE_RP macros. - * - * 2) The value of the clean_index_size must not be mistaken - * for the current clean size of the cache. Rather, the - * clean size of the cache is the current value of - * clean_index_size plus the amount of empty space (if any) - * in the cache. - * - * dirty_index_size: Number of bytes of dirty entries currently stored in - * the hash table. Note that the index_size field (above) - * is also the sum of the sizes of all entries in the cache. - * Thus we should have the invarient that clean_index_size + - * dirty_index_size == index_size. - * - * WARNING: - * - * 1) The dirty_index_size field is not maintained by the - * index macros, as the hash table doesn't care whether - * the entry is clean or dirty. Instead the field is - * maintained in the H5C__UPDATE_RP macros. - * - * index: Array of pointer to H5C_cache_entry_t of size - * H5C__HASH_TABLE_LEN. At present, this value is a power - * of two, not the usual prime number. - * - * I hope that the variable size of cache elements, the large - * hash table size, and the way in which HDF5 allocates space - * will combine to avoid problems with periodicity. If so, we - * can use a trivial hash function (a bit-and and a 3 bit left - * shift) with some small savings. - * - * If not, it will become evident in the statistics. Changing - * to the usual prime number length hash table will require - * changing the H5C__HASH_FCN macro and the deletion of the - * H5C__HASH_MASK #define. No other changes should be required. - * - * - * When we flush the cache, we need to write entries out in increasing - * address order. An instance of a skip list is used to store dirty entries in - * sorted order. Whether it is cheaper to sort the dirty entries as needed, - * or to maintain the list is an open question. At a guess, it depends - * on how frequently the cache is flushed. We will see how it goes. - * - * For now at least, I will not remove dirty entries from the list as they - * are flushed. (this has been changed -- dirty entries are now removed from - * the skip list as they are flushed. JRM - 10/25/05) - * - * slist_len: Number of entries currently in the skip list - * used to maintain a sorted list of dirty entries in the - * cache. - * - * slist_size: Number of bytes of cache entries currently stored in the - * skip list used to maintain a sorted list of - * dirty entries in the cache. - * - * slist_ptr: pointer to the instance of H5SL_t used maintain a sorted - * list of dirty entries in the cache. This sorted list has - * two uses: - * - * a) It allows us to flush dirty entries in increasing address - * order, which results in significant savings. - * - * b) It facilitates checking for adjacent dirty entries when - * attempting to evict entries from the cache. While we - * don't use this at present, I hope that this will allow - * some optimizations when I get to it. - * - * num_last_entries: The number of entries in the cache that can only be - * flushed after all other entries in the cache have - * been flushed. At this time, this will only ever be - * one entry (the superblock), and the code has been - * protected with HDasserts to enforce this. This restraint - * can certainly be relaxed in the future if the need for - * multiple entries being flushed last arises, though - * explicit tests for that case should be added when said - * HDasserts are removed. - * - * With the addition of the fractal heap, the cache must now deal with - * the case in which entries may be dirtied, moved, or have their sizes - * changed during a flush. To allow sanity checks in this situation, the - * following two fields have been added. They are only compiled in when - * H5C_DO_SANITY_CHECKS is TRUE. - * - * slist_len_increase: Number of entries that have been added to the - * slist since the last time this field was set to zero. - * - * slist_size_increase: Total size of all entries that have been added - * to the slist since the last time this field was set to - * zero. - * - * - * When a cache entry is protected, it must be removed from the LRU - * list(s) as it cannot be either flushed or evicted until it is unprotected. - * The following fields are used to implement the protected list (pl). - * - * pl_len: Number of entries currently residing on the protected list. - * - * pl_size: Number of bytes of cache entries currently residing on the - * protected list. - * - * pl_head_ptr: Pointer to the head of the doubly linked list of protected - * entries. Note that cache entries on this list are linked - * by their next and prev fields. - * - * This field is NULL if the list is empty. - * - * pl_tail_ptr: Pointer to the tail of the doubly linked list of protected - * entries. Note that cache entries on this list are linked - * by their next and prev fields. - * - * This field is NULL if the list is empty. - * - * - * For very frequently used entries, the protect/unprotect overhead can - * become burdensome. To avoid this overhead, I have modified the cache - * to allow entries to be "pinned". A pinned entry is similar to a - * protected entry, in the sense that it cannot be evicted, and that - * the entry can be modified at any time. - * - * Pinning an entry has the following implications: - * - * 1) A pinned entry cannot be evicted. Thus unprotected - * pinned entries reside in the pinned entry list, instead - * of the LRU list(s) (or other lists maintained by the current - * replacement policy code). - * - * 2) A pinned entry can be accessed or modified at any time. - * Therefore, the cache must check with the entry owner - * before flushing it. If permission is denied, the - * cache just skips the entry in the flush. - * - * 3) A pinned entry can be marked as dirty (and possibly - * change size) while it is unprotected. - * - * 4) The flush-destroy code must allow pinned entries to - * be unpinned (and possibly unprotected) during the - * flush. - * - * Since pinned entries cannot be evicted, they must be kept on a pinned - * entry list (pel), instead of being entrusted to the replacement policy - * code. - * - * Maintaining the pinned entry list requires the following fields: - * - * pel_len: Number of entries currently residing on the pinned - * entry list. - * - * pel_size: Number of bytes of cache entries currently residing on - * the pinned entry list. - * - * pel_head_ptr: Pointer to the head of the doubly linked list of pinned - * but not protected entries. Note that cache entries on - * this list are linked by their next and prev fields. - * - * This field is NULL if the list is empty. - * - * pel_tail_ptr: Pointer to the tail of the doubly linked list of pinned - * but not protected entries. Note that cache entries on - * this list are linked by their next and prev fields. - * - * This field is NULL if the list is empty. - * - * - * The cache must have a replacement policy, and the fields supporting this - * policy must be accessible from this structure. - * - * While there has been interest in several replacement policies for - * this cache, the initial development schedule is tight. Thus I have - * elected to support only a modified LRU (least recently used) policy - * for the first cut. - * - * To further simplify matters, I have simply included the fields needed - * by the modified LRU in this structure. When and if we add support for - * other policies, it will probably be easiest to just add the necessary - * fields to this structure as well -- we only create one instance of this - * structure per file, so the overhead is not excessive. - * - * - * Fields supporting the modified LRU policy: - * - * See most any OS text for a discussion of the LRU replacement policy. - * - * When operating in parallel mode, we must ensure that a read does not - * cause a write. If it does, the process will hang, as the write will - * be collective and the other processes will not know to participate. - * - * To deal with this issue, I have modified the usual LRU policy by adding - * clean and dirty LRU lists to the usual LRU list. - * - * The clean LRU list is simply the regular LRU list with all dirty cache - * entries removed. - * - * Similarly, the dirty LRU list is the regular LRU list with all the clean - * cache entries removed. - * - * When reading in parallel mode, we evict from the clean LRU list only. - * This implies that we must try to ensure that the clean LRU list is - * reasonably well stocked at all times. - * - * We attempt to do this by trying to flush enough entries on each write - * to keep the cLRU_list_size >= min_clean_size. - * - * Even if we start with a completely clean cache, a sequence of protects - * without unprotects can empty the clean LRU list. In this case, the - * cache must grow temporarily. At the next write, we will attempt to - * evict enough entries to reduce index_size to less than max_cache_size. - * While this will usually be possible, all bets are off if enough entries - * are protected. - * - * Discussions of the individual fields used by the modified LRU replacement - * policy follow: - * - * LRU_list_len: Number of cache entries currently on the LRU list. - * - * Observe that LRU_list_len + pl_len must always equal - * index_len. - * - * LRU_list_size: Number of bytes of cache entries currently residing on the - * LRU list. - * - * Observe that LRU_list_size + pl_size must always equal - * index_size. - * - * LRU_head_ptr: Pointer to the head of the doubly linked LRU list. Cache - * entries on this list are linked by their next and prev fields. - * - * This field is NULL if the list is empty. - * - * LRU_tail_ptr: Pointer to the tail of the doubly linked LRU list. Cache - * entries on this list are linked by their next and prev fields. - * - * This field is NULL if the list is empty. - * - * cLRU_list_len: Number of cache entries currently on the clean LRU list. - * - * Observe that cLRU_list_len + dLRU_list_len must always - * equal LRU_list_len. - * - * cLRU_list_size: Number of bytes of cache entries currently residing on - * the clean LRU list. - * - * Observe that cLRU_list_size + dLRU_list_size must always - * equal LRU_list_size. - * - * cLRU_head_ptr: Pointer to the head of the doubly linked clean LRU list. - * Cache entries on this list are linked by their aux_next and - * aux_prev fields. - * - * This field is NULL if the list is empty. - * - * cLRU_tail_ptr: Pointer to the tail of the doubly linked clean LRU list. - * Cache entries on this list are linked by their aux_next and - * aux_prev fields. - * - * This field is NULL if the list is empty. - * - * dLRU_list_len: Number of cache entries currently on the dirty LRU list. - * - * Observe that cLRU_list_len + dLRU_list_len must always - * equal LRU_list_len. - * - * dLRU_list_size: Number of cache entries currently on the dirty LRU list. - * - * Observe that cLRU_list_len + dLRU_list_len must always - * equal LRU_list_len. - * - * dLRU_head_ptr: Pointer to the head of the doubly linked dirty LRU list. - * Cache entries on this list are linked by their aux_next and - * aux_prev fields. - * - * This field is NULL if the list is empty. - * - * dLRU_tail_ptr: Pointer to the tail of the doubly linked dirty LRU list. - * Cache entries on this list are linked by their aux_next and - * aux_prev fields. - * - * This field is NULL if the list is empty. - * - * - * Automatic cache size adjustment: - * - * While the default cache size is adequate for most cases, we can run into - * cases where the default is too small. Ideally, we will let the user - * adjust the cache size as required. However, this is not possible in all - * cases. Thus I have added automatic cache size adjustment code. - * - * The configuration for the automatic cache size adjustment is stored in - * the structure described below: - * - * size_increase_possible: Depending on the configuration data given - * in the resize_ctl field, it may or may not be possible - * to increase the size of the cache. Rather than test for - * all the ways this can happen, we simply set this flag when - * we receive a new configuration. - * - * flash_size_increase_possible: Depending on the configuration data given - * in the resize_ctl field, it may or may not be possible - * for a flash size increase to occur. We set this flag - * whenever we receive a new configuration so as to avoid - * repeated calculations. - * - * flash_size_increase_threshold: If a flash cache size increase is possible, - * this field is used to store the minimum size of a new entry - * or size increase needed to trigger a flash cache size - * increase. Note that this field must be updated whenever - * the size of the cache is changed. - * - * size_decrease_possible: Depending on the configuration data given - * in the resize_ctl field, it may or may not be possible - * to decrease the size of the cache. Rather than test for - * all the ways this can happen, we simply set this flag when - * we receive a new configuration. - * - * cache_full: Boolean flag used to keep track of whether the cache is - * full, so we can refrain from increasing the size of a - * cache which hasn't used up the space allotted to it. - * - * The field is initialized to FALSE, and then set to TRUE - * whenever we attempt to make space in the cache. - * - * resize_enabled: This is another convenience flag which is set whenever - * a new set of values for resize_ctl are provided. Very - * simply, - * - * resize_enabled = size_increase_possible || - * size_decrease_possible; - * - * size_decreased: Boolean flag set to TRUE whenever the maximum cache - * size is decreased. The flag triggers a call to - * H5C_make_space_in_cache() on the next call to H5C_protect(). - * - * resize_ctl: Instance of H5C_auto_size_ctl_t containing configuration - * data for automatic cache resizing. - * - * epoch_markers_active: Integer field containing the number of epoch - * markers currently in use in the LRU list. This value - * must be in the range [0, H5C__MAX_EPOCH_MARKERS - 1]. - * - * epoch_marker_active: Array of boolean of length H5C__MAX_EPOCH_MARKERS. - * This array is used to track which epoch markers are currently - * in use. - * - * epoch_marker_ringbuf: Array of int of length H5C__MAX_EPOCH_MARKERS + 1. - * - * To manage the epoch marker cache entries, it is necessary - * to track their order in the LRU list. This is done with - * epoch_marker_ringbuf. When markers are inserted at the - * head of the LRU list, the index of the marker in the - * epoch_markers array is inserted at the tail of the ring - * buffer. When it becomes the epoch_marker_active'th marker - * in the LRU list, it will have worked its way to the head - * of the ring buffer as well. This allows us to remove it - * without scanning the LRU list if such is required. - * - * epoch_marker_ringbuf_first: Integer field containing the index of the - * first entry in the ring buffer. - * - * epoch_marker_ringbuf_last: Integer field containing the index of the - * last entry in the ring buffer. - * - * epoch_marker_ringbuf_size: Integer field containing the number of entries - * in the ring buffer. - * - * epoch_markers: Array of instances of H5C_cache_entry_t of length - * H5C__MAX_EPOCH_MARKERS. The entries are used as markers - * in the LRU list to identify cache entries that haven't - * been accessed for some (small) specified number of - * epochs. These entries (if any) can then be evicted and - * the cache size reduced -- ideally without evicting any - * of the current working set. Needless to say, the epoch - * length and the number of epochs before an unused entry - * must be chosen so that all, or almost all, the working - * set will be accessed before the limit. - * - * Epoch markers only appear in the LRU list, never in - * the index or slist. While they are of type - * H5C__EPOCH_MARKER_TYPE, and have associated class - * functions, these functions should never be called. - * - * The addr fields of these instances of H5C_cache_entry_t - * are set to the index of the instance in the epoch_markers - * array, the size is set to 0, and the type field points - * to the constant structure epoch_marker_class defined - * in H5C.c. The next and prev fields are used as usual - * to link the entry into the LRU list. - * - * All other fields are unused. - * - * - * Cache hit rate collection fields: - * - * We supply the current cache hit rate on request, so we must keep a - * simple cache hit rate computation regardless of whether statistics - * collection is enabled. The following fields support this capability. - * - * cache_hits: Number of cache hits since the last time the cache hit - * rate statistics were reset. Note that when automatic cache - * re-sizing is enabled, this field will be reset every automatic - * resize epoch. - * - * cache_accesses: Number of times the cache has been accessed while - * since the last since the last time the cache hit rate statistics - * were reset. Note that when automatic cache re-sizing is enabled, - * this field will be reset every automatic resize epoch. - * - * - * Statistics collection fields: - * - * When enabled, these fields are used to collect statistics as described - * below. The first set are collected only when H5C_COLLECT_CACHE_STATS - * is true. - * - * hits: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the number of times an entry with type id - * equal to the array index has been in cache when requested in - * the current epoch. - * - * misses: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the number of times an entry with type id - * equal to the array index has not been in cache when - * requested in the current epoch. - * - * write_protects: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The - * cells are used to record the number of times an entry with - * type id equal to the array index has been write protected - * in the current epoch. - * - * Observe that (hits + misses) = (write_protects + read_protects). - * - * read_protects: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The - * cells are used to record the number of times an entry with - * type id equal to the array index has been read protected in - * the current epoch. - * - * Observe that (hits + misses) = (write_protects + read_protects). - * - * max_read_protects: Array of int32 of length H5C__MAX_NUM_TYPE_IDS + 1. - * The cells are used to maximum number of simultaneous read - * protects on any entry with type id equal to the array index - * in the current epoch. - * - * insertions: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the number of times an entry with type - * id equal to the array index has been inserted into the - * cache in the current epoch. - * - * pinned_insertions: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. - * The cells are used to record the number of times an entry - * with type id equal to the array index has been inserted - * pinned into the cache in the current epoch. - * - * clears: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the number of times a dirty entry with type - * id equal to the array index has been cleared in the current - * epoch. - * - * flushes: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the number of times an entry with type id - * equal to the array index has been written to disk in the - * current epoch. - * - * evictions: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the number of times an entry with type id - * equal to the array index has been evicted from the cache in - * the current epoch. - * - * moves: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the number of times an entry with type - * id equal to the array index has been moved in the current - * epoch. - * - * entry_flush_moves: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. - * The cells are used to record the number of times an entry - * with type id equal to the array index has been moved - * during its flush callback in the current epoch. - * - * cache_flush_moves: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. - * The cells are used to record the number of times an entry - * with type id equal to the array index has been moved - * during a cache flush in the current epoch. - * - * pins: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the number of times an entry with type - * id equal to the array index has been pinned in the current - * epoch. - * - * unpins: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the number of times an entry with type - * id equal to the array index has been unpinned in the current - * epoch. - * - * dirty_pins: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the number of times an entry with type - * id equal to the array index has been marked dirty while pinned - * in the current epoch. - * - * pinned_flushes: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The - * cells are used to record the number of times an entry - * with type id equal to the array index has been flushed while - * pinned in the current epoch. - * - * pinned_clears: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The - * cells are used to record the number of times an entry - * with type id equal to the array index has been cleared while - * pinned in the current epoch. - * - * size_increases: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. - * The cells are used to record the number of times an entry - * with type id equal to the array index has increased in - * size in the current epoch. - * - * size_decreases: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. - * The cells are used to record the number of times an entry - * with type id equal to the array index has decreased in - * size in the current epoch. - * - * entry_flush_size_changes: Array of int64 of length - * H5C__MAX_NUM_TYPE_IDS + 1. The cells are used to record - * the number of times an entry with type id equal to the - * array index has changed size while in its flush callback. - * - * cache_flush_size_changes: Array of int64 of length - * H5C__MAX_NUM_TYPE_IDS + 1. The cells are used to record - * the number of times an entry with type id equal to the - * array index has changed size during a cache flush - * - * total_ht_insertions: Number of times entries have been inserted into the - * hash table in the current epoch. - * - * total_ht_deletions: Number of times entries have been deleted from the - * hash table in the current epoch. - * - * successful_ht_searches: int64 containing the total number of successful - * searches of the hash table in the current epoch. - * - * total_successful_ht_search_depth: int64 containing the total number of - * entries other than the targets examined in successful - * searches of the hash table in the current epoch. - * - * failed_ht_searches: int64 containing the total number of unsuccessful - * searches of the hash table in the current epoch. - * - * total_failed_ht_search_depth: int64 containing the total number of - * entries examined in unsuccessful searches of the hash - * table in the current epoch. - * - * max_index_len: Largest value attained by the index_len field in the - * current epoch. - * - * max_index_size: Largest value attained by the index_size field in the - * current epoch. - * - * max_clean_index_size: Largest value attained by the clean_index_size field - * in the current epoch. - * - * max_dirty_index_size: Largest value attained by the dirty_index_size field - * in the current epoch. - * - * max_slist_len: Largest value attained by the slist_len field in the - * current epoch. - * - * max_slist_size: Largest value attained by the slist_size field in the - * current epoch. - * - * max_pl_len: Largest value attained by the pl_len field in the - * current epoch. - * - * max_pl_size: Largest value attained by the pl_size field in the - * current epoch. - * - * max_pel_len: Largest value attained by the pel_len field in the - * current epoch. - * - * max_pel_size: Largest value attained by the pel_size field in the - * current epoch. - * - * calls_to_msic: Total number of calls to H5C_make_space_in_cache - * - * total_entries_skipped_in_msic: Number of clean entries skipped while - * enforcing the min_clean_fraction in H5C_make_space_in_cache(). - * - * total_entries_scanned_in_msic: Number of clean entries skipped while - * enforcing the min_clean_fraction in H5C_make_space_in_cache(). - * - * max_entries_skipped_in_msic: Maximum number of clean entries skipped - * in any one call to H5C_make_space_in_cache(). - * - * max_entries_scanned_in_msic: Maximum number of entries scanned over - * in any one call to H5C_make_space_in_cache(). - * - * entries_scanned_to_make_space: Number of entries scanned only when looking - * for entries to evict in order to make space in cache. - - * The remaining stats are collected only when both H5C_COLLECT_CACHE_STATS - * and H5C_COLLECT_CACHE_ENTRY_STATS are true. - * - * max_accesses: Array of int32 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the maximum number of times any single - * entry with type id equal to the array index has been - * accessed in the current epoch. - * - * min_accesses: Array of int32 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the minimum number of times any single - * entry with type id equal to the array index has been - * accessed in the current epoch. - * - * max_clears: Array of int32 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the maximum number of times any single - * entry with type id equal to the array index has been cleared - * in the current epoch. - * - * max_flushes: Array of int32 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the maximum number of times any single - * entry with type id equal to the array index has been - * flushed in the current epoch. - * - * max_size: Array of size_t of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the maximum size of any single entry - * with type id equal to the array index that has resided in - * the cache in the current epoch. - * - * max_pins: Array of size_t of length H5C__MAX_NUM_TYPE_IDS + 1. The cells - * are used to record the maximum number of times that any single - * entry with type id equal to the array index that has been - * marked as pinned in the cache in the current epoch. - * - * - * Fields supporting testing: - * - * prefix Array of char used to prefix debugging output. The - * field is intended to allow marking of output of with - * the processes mpi rank. - * - * get_entry_ptr_from_addr_counter: Counter used to track the number of - * times the H5C_get_entry_ptr_from_addr() function has been - * called successfully. This field is only defined when - * NDEBUG is not #defined. - * - ****************************************************************************/ - +/* Cache configuration settings */ #define H5C__HASH_TABLE_LEN (64 * 1024) /* must be a power of 2 */ - #define H5C__H5C_T_MAGIC 0x005CAC0E #define H5C__MAX_NUM_TYPE_IDS 27 #define H5C__PREFIX_LEN 32 -struct H5C_t -{ - uint32_t magic; - - hbool_t flush_in_progress; - - FILE * trace_file_ptr; - - void * aux_ptr; - - int32_t max_type_id; - const char * (* type_name_table_ptr); - - size_t max_cache_size; - size_t min_clean_size; - - H5C_write_permitted_func_t check_write_permitted; - hbool_t write_permitted; - - H5C_log_flush_func_t log_flush; - - hbool_t evictions_enabled; - - int32_t index_len; - size_t index_size; - size_t clean_index_size; - size_t dirty_index_size; - H5C_cache_entry_t * (index[H5C__HASH_TABLE_LEN]); - - hbool_t ignore_tags; - - int32_t slist_len; - size_t slist_size; - H5SL_t * slist_ptr; - int32_t num_last_entries; -#if H5C_DO_SANITY_CHECKS - int64_t slist_len_increase; - int64_t slist_size_increase; -#endif /* H5C_DO_SANITY_CHECKS */ - - int32_t pl_len; - size_t pl_size; - H5C_cache_entry_t * pl_head_ptr; - H5C_cache_entry_t * pl_tail_ptr; - - int32_t pel_len; - size_t pel_size; - H5C_cache_entry_t * pel_head_ptr; - H5C_cache_entry_t * pel_tail_ptr; - - int32_t LRU_list_len; - size_t LRU_list_size; - H5C_cache_entry_t * LRU_head_ptr; - H5C_cache_entry_t * LRU_tail_ptr; - - int32_t cLRU_list_len; - size_t cLRU_list_size; - H5C_cache_entry_t * cLRU_head_ptr; - H5C_cache_entry_t * cLRU_tail_ptr; - - int32_t dLRU_list_len; - size_t dLRU_list_size; - H5C_cache_entry_t * dLRU_head_ptr; - H5C_cache_entry_t * dLRU_tail_ptr; - - hbool_t size_increase_possible; - hbool_t flash_size_increase_possible; - size_t flash_size_increase_threshold; - hbool_t size_decrease_possible; - hbool_t resize_enabled; - hbool_t cache_full; - hbool_t size_decreased; - H5C_auto_size_ctl_t resize_ctl; - - int32_t epoch_markers_active; - hbool_t epoch_marker_active[H5C__MAX_EPOCH_MARKERS]; - int32_t epoch_marker_ringbuf[H5C__MAX_EPOCH_MARKERS+1]; - int32_t epoch_marker_ringbuf_first; - int32_t epoch_marker_ringbuf_last; - int32_t epoch_marker_ringbuf_size; - H5C_cache_entry_t epoch_markers[H5C__MAX_EPOCH_MARKERS]; - - int64_t cache_hits; - int64_t cache_accesses; - -#if H5C_COLLECT_CACHE_STATS - - /* stats fields */ - int64_t hits[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t misses[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t write_protects[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t read_protects[H5C__MAX_NUM_TYPE_IDS + 1]; - int32_t max_read_protects[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t insertions[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t pinned_insertions[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t clears[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t flushes[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t evictions[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t moves[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t entry_flush_moves[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t cache_flush_moves[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t pins[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t unpins[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t dirty_pins[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t pinned_flushes[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t pinned_clears[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t size_increases[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t size_decreases[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t entry_flush_size_changes[H5C__MAX_NUM_TYPE_IDS + 1]; - int64_t cache_flush_size_changes[H5C__MAX_NUM_TYPE_IDS + 1]; - - int64_t total_ht_insertions; - int64_t total_ht_deletions; - int64_t successful_ht_searches; - int64_t total_successful_ht_search_depth; - int64_t failed_ht_searches; - int64_t total_failed_ht_search_depth; - - int32_t max_index_len; - size_t max_index_size; - size_t max_clean_index_size; - size_t max_dirty_index_size; - - int32_t max_slist_len; - size_t max_slist_size; - - int32_t max_pl_len; - size_t max_pl_size; - - int32_t max_pel_len; - size_t max_pel_size; - - int64_t calls_to_msic; - int64_t total_entries_skipped_in_msic; - int64_t total_entries_scanned_in_msic; - int32_t max_entries_skipped_in_msic; - int32_t max_entries_scanned_in_msic; - int64_t entries_scanned_to_make_space; - -#if H5C_COLLECT_CACHE_ENTRY_STATS - - int32_t max_accesses[H5C__MAX_NUM_TYPE_IDS + 1]; - int32_t min_accesses[H5C__MAX_NUM_TYPE_IDS + 1]; - int32_t max_clears[H5C__MAX_NUM_TYPE_IDS + 1]; - int32_t max_flushes[H5C__MAX_NUM_TYPE_IDS + 1]; - size_t max_size[H5C__MAX_NUM_TYPE_IDS + 1]; - int32_t max_pins[H5C__MAX_NUM_TYPE_IDS + 1]; - -#endif /* H5C_COLLECT_CACHE_ENTRY_STATS */ - -#endif /* H5C_COLLECT_CACHE_STATS */ - - char prefix[H5C__PREFIX_LEN]; - -#ifndef NDEBUG - - int64_t get_entry_ptr_from_addr_counter; - -#endif /* NDEBUG */ -}; - - -/****************************************************************************/ -/***************************** Macro Definitions ****************************/ -/****************************************************************************/ - /**************************************************************************** * * We maintain doubly linked lists of instances of H5C_cache_entry_t for a @@ -1055,11 +91,11 @@ struct H5C_t * * from the H5C__DLL_PRE_REMOVE_SC macro. With the addition of the * epoch markers used in the age out based cache size reduction algorithm, - * this invarient need not hold, as the epoch markers are of size 0. + * this invariant need not hold, as the epoch markers are of size 0. * * One could argue that I should have given the epoch markers a positive * size, but this would break the index_size = LRU_list_size + pl_size - * + pel_size invarient. + * + pel_size invariant. * * Alternatively, I could pass the current decr_mode in to the macro, * and just skip the check whenever epoch markers may be in use. @@ -1615,7 +651,6 @@ if ( ( (entry_ptr) == NULL ) || \ ((cache_ptr)->max_size)[(entry_ptr)->type->id] ) \ ((cache_ptr)->max_size)[(entry_ptr)->type->id] \ = (entry_ptr)->size; \ - } \ } #define H5C__UPDATE_STATS_FOR_PROTECT(cache_ptr, entry_ptr, hit) \ @@ -1781,21 +816,21 @@ if ( ( (entry_ptr) == NULL ) || \ #if H5C_DO_SANITY_CHECKS -#define H5C__PRE_HT_INSERT_SC(cache_ptr, entry_ptr, fail_val) \ -if ( ( (cache_ptr) == NULL ) || \ - ( (cache_ptr)->magic != H5C__H5C_T_MAGIC ) || \ - ( (entry_ptr) == NULL ) || \ - ( ! H5F_addr_defined((entry_ptr)->addr) ) || \ - ( (entry_ptr)->ht_next != NULL ) || \ - ( (entry_ptr)->ht_prev != NULL ) || \ - ( (entry_ptr)->size <= 0 ) || \ - ( (k = H5C__HASH_FCN((entry_ptr)->addr)) < 0 ) || \ - ( k >= H5C__HASH_TABLE_LEN ) || \ - ( (cache_ptr)->index_size != \ - ((cache_ptr)->clean_index_size + \ - (cache_ptr)->dirty_index_size) ) ) { \ - HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, \ - "Pre HT insert SC failed") \ +#define H5C__PRE_HT_INSERT_SC(cache_ptr, entry_ptr, fail_val) \ +if ( ( (cache_ptr) == NULL ) || \ + ( (cache_ptr)->magic != H5C__H5C_T_MAGIC ) || \ + ( (entry_ptr) == NULL ) || \ + ( ! H5F_addr_defined((entry_ptr)->addr) ) || \ + ( (entry_ptr)->ht_next != NULL ) || \ + ( (entry_ptr)->ht_prev != NULL ) || \ + ( (entry_ptr)->size <= 0 ) || \ + ( H5C__HASH_FCN((entry_ptr)->addr) < 0 ) || \ + ( H5C__HASH_FCN((entry_ptr)->addr) >= H5C__HASH_TABLE_LEN ) || \ + ( (cache_ptr)->index_size != \ + ((cache_ptr)->clean_index_size + \ + (cache_ptr)->dirty_index_size) ) ) { \ + HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, \ + "Pre HT insert SC failed") \ } #define H5C__PRE_HT_REMOVE_SC(cache_ptr, entry_ptr) \ @@ -1880,7 +915,6 @@ if ( ( (cache_ptr) == NULL ) || \ ( (cache_ptr)->index_size != \ ((cache_ptr)->clean_index_size + \ (cache_ptr)->dirty_index_size) ) || \ - ( (entry_ptr == NULL) ) || \ ( ( !( was_clean ) || \ ( (cache_ptr)->clean_index_size < (old_size) ) ) && \ ( ( (was_clean) ) || \ @@ -1978,22 +1012,18 @@ if ( (cache_ptr)->index_size != \ H5C__PRE_HT_INSERT_SC(cache_ptr, entry_ptr, fail_val) \ k = H5C__HASH_FCN((entry_ptr)->addr); \ if ( ((cache_ptr)->index)[k] == NULL ) \ - { \ ((cache_ptr)->index)[k] = (entry_ptr); \ - } \ - else \ - { \ + else { \ (entry_ptr)->ht_next = ((cache_ptr)->index)[k]; \ (entry_ptr)->ht_next->ht_prev = (entry_ptr); \ ((cache_ptr)->index)[k] = (entry_ptr); \ } \ (cache_ptr)->index_len++; \ (cache_ptr)->index_size += (entry_ptr)->size; \ - if ( (entry_ptr)->is_dirty ) { \ + if ( (entry_ptr)->is_dirty ) \ (cache_ptr)->dirty_index_size += (entry_ptr)->size; \ - } else { \ + else \ (cache_ptr)->clean_index_size += (entry_ptr)->size; \ - } \ if ((entry_ptr)->flush_me_last) { \ (cache_ptr)->num_last_entries++; \ HDassert((cache_ptr)->num_last_entries == 1); \ @@ -2007,26 +1037,19 @@ if ( (cache_ptr)->index_size != \ H5C__PRE_HT_REMOVE_SC(cache_ptr, entry_ptr) \ k = H5C__HASH_FCN((entry_ptr)->addr); \ if ( (entry_ptr)->ht_next ) \ - { \ (entry_ptr)->ht_next->ht_prev = (entry_ptr)->ht_prev; \ - } \ if ( (entry_ptr)->ht_prev ) \ - { \ (entry_ptr)->ht_prev->ht_next = (entry_ptr)->ht_next; \ - } \ if ( ((cache_ptr)->index)[k] == (entry_ptr) ) \ - { \ ((cache_ptr)->index)[k] = (entry_ptr)->ht_next; \ - } \ (entry_ptr)->ht_next = NULL; \ (entry_ptr)->ht_prev = NULL; \ (cache_ptr)->index_len--; \ (cache_ptr)->index_size -= (entry_ptr)->size; \ - if ( (entry_ptr)->is_dirty ) { \ + if ( (entry_ptr)->is_dirty ) \ (cache_ptr)->dirty_index_size -= (entry_ptr)->size; \ - } else { \ + else \ (cache_ptr)->clean_index_size -= (entry_ptr)->size; \ - } \ if ((entry_ptr)->flush_me_last) { \ (cache_ptr)->num_last_entries--; \ HDassert((cache_ptr)->num_last_entries == 0); \ @@ -2041,20 +1064,15 @@ if ( (cache_ptr)->index_size != \ H5C__PRE_HT_SEARCH_SC(cache_ptr, Addr, fail_val) \ k = H5C__HASH_FCN(Addr); \ entry_ptr = ((cache_ptr)->index)[k]; \ - while ( ( entry_ptr ) && ( H5F_addr_ne(Addr, (entry_ptr)->addr) ) ) \ - { \ + while ( ( entry_ptr ) && ( H5F_addr_ne(Addr, (entry_ptr)->addr) ) ) { \ (entry_ptr) = (entry_ptr)->ht_next; \ (depth)++; \ } \ - if ( entry_ptr ) \ - { \ + if ( entry_ptr ) { \ H5C__POST_SUC_HT_SEARCH_SC(cache_ptr, entry_ptr, Addr, k, fail_val) \ - if ( entry_ptr != ((cache_ptr)->index)[k] ) \ - { \ + if ( entry_ptr != ((cache_ptr)->index)[k] ) { \ if ( (entry_ptr)->ht_next ) \ - { \ (entry_ptr)->ht_next->ht_prev = (entry_ptr)->ht_prev; \ - } \ HDassert( (entry_ptr)->ht_prev != NULL ); \ (entry_ptr)->ht_prev->ht_next = (entry_ptr)->ht_next; \ ((cache_ptr)->index)[k]->ht_prev = (entry_ptr); \ @@ -2074,20 +1092,15 @@ if ( (cache_ptr)->index_size != \ H5C__PRE_HT_SEARCH_SC(cache_ptr, Addr, fail_val) \ k = H5C__HASH_FCN(Addr); \ entry_ptr = ((cache_ptr)->index)[k]; \ - while ( ( entry_ptr ) && ( H5F_addr_ne(Addr, (entry_ptr)->addr) ) ) \ - { \ + while ( ( entry_ptr ) && ( H5F_addr_ne(Addr, (entry_ptr)->addr) ) ) { \ (entry_ptr) = (entry_ptr)->ht_next; \ (depth)++; \ } \ - if ( entry_ptr ) \ - { \ + if ( entry_ptr ) { \ H5C__POST_SUC_HT_SEARCH_SC(cache_ptr, entry_ptr, Addr, k, fail_val) \ - if ( entry_ptr != ((cache_ptr)->index)[k] ) \ - { \ + if ( entry_ptr != ((cache_ptr)->index)[k] ) { \ if ( (entry_ptr)->ht_next ) \ - { \ (entry_ptr)->ht_next->ht_prev = (entry_ptr)->ht_prev; \ - } \ HDassert( (entry_ptr)->ht_prev != NULL ); \ (entry_ptr)->ht_prev->ht_next = (entry_ptr)->ht_next; \ ((cache_ptr)->index)[k]->ht_prev = (entry_ptr); \ @@ -2122,16 +1135,14 @@ if ( (cache_ptr)->index_size != \ entry_ptr, was_clean) \ (cache_ptr)->index_size -= (old_size); \ (cache_ptr)->index_size += (new_size); \ - if ( was_clean ) { \ + if ( was_clean ) \ (cache_ptr)->clean_index_size -= (old_size); \ - } else { \ + else \ (cache_ptr)->dirty_index_size -= (old_size); \ - } \ - if ( (entry_ptr)->is_dirty ) { \ + if ( (entry_ptr)->is_dirty ) \ (cache_ptr)->dirty_index_size += (new_size); \ - } else { \ + else \ (cache_ptr)->clean_index_size += (new_size); \ - } \ H5C__POST_HT_ENTRY_SIZE_CHANGE_SC(cache_ptr, old_size, new_size, \ entry_ptr) \ } @@ -2208,8 +1219,7 @@ if ( (cache_ptr)->index_size != \ HDassert( H5F_addr_defined((entry_ptr)->addr) ); \ HDassert( !((entry_ptr)->in_slist) ); \ \ - if ( H5SL_insert((cache_ptr)->slist_ptr, entry_ptr, &(entry_ptr)->addr) \ - < 0 ) \ + if(H5SL_insert((cache_ptr)->slist_ptr, entry_ptr, &(entry_ptr)->addr) < 0) \ HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, (fail_val), \ "Can't insert entry in skip list") \ \ @@ -2235,8 +1245,7 @@ if ( (cache_ptr)->index_size != \ HDassert( H5F_addr_defined((entry_ptr)->addr) ); \ HDassert( !((entry_ptr)->in_slist) ); \ \ - if ( H5SL_insert((cache_ptr)->slist_ptr, entry_ptr, &(entry_ptr)->addr) \ - < 0 ) \ + if(H5SL_insert((cache_ptr)->slist_ptr, entry_ptr, &(entry_ptr)->addr) < 0) \ HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, (fail_val), \ "Can't insert entry in skip list") \ \ @@ -2297,8 +1306,7 @@ if ( (cache_ptr)->index_size != \ HDassert( (cache_ptr)->slist_ptr ); \ \ if ( H5SL_remove((cache_ptr)->slist_ptr, &(entry_ptr)->addr) \ - != (entry_ptr) ) \ - \ + != (entry_ptr) ) \ HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, FAIL, \ "Can't delete entry from skip list.") \ \ @@ -3601,5 +2609,983 @@ if ( (cache_ptr)->index_size != \ #endif /* H5C_MAINTAIN_CLEAN_AND_DIRTY_LRU_LISTS */ +/****************************/ +/* Package Private Typedefs */ +/****************************/ + +/**************************************************************************** + * + * structure H5C_t + * + * Catchall structure for all variables specific to an instance of the cache. + * + * While the individual fields of the structure are discussed below, the + * following overview may be helpful. + * + * Entries in the cache are stored in an instance of H5TB_TREE, indexed on + * the entry's disk address. While the H5TB_TREE is less efficient than + * hash table, it keeps the entries in address sorted order. As flushes + * in parallel mode are more efficient if they are issued in increasing + * address order, this is a significant benefit. Also the H5TB_TREE code + * was readily available, which reduced development time. + * + * While the cache was designed with multiple replacement policies in mind, + * at present only a modified form of LRU is supported. + * + * JRM - 4/26/04 + * + * Profiling has indicated that searches in the instance of H5TB_TREE are + * too expensive. To deal with this issue, I have augmented the cache + * with a hash table in which all entries will be stored. Given the + * advantages of flushing entries in increasing address order, the TBBT + * is retained, but only dirty entries are stored in it. At least for + * now, we will leave entries in the TBBT after they are flushed. + * + * Note that index_size and index_len now refer to the total size of + * and number of entries in the hash table. + * + * JRM - 7/19/04 + * + * The TBBT has since been replaced with a skip list. This change + * greatly predates this note. + * + * JRM - 9/26/05 + * + * magic: Unsigned 32 bit integer always set to H5C__H5C_T_MAGIC. + * This field is used to validate pointers to instances of + * H5C_t. + * + * flush_in_progress: Boolean flag indicating whether a flush is in + * progress. + * + * trace_file_ptr: File pointer pointing to the trace file, which is used + * to record cache operations for use in simulations and design + * studies. This field will usually be NULL, indicating that + * no trace file should be recorded. + * + * Since much of the code supporting the parallel metadata + * cache is in H5AC, we don't write the trace file from + * H5C. Instead, H5AC reads the trace_file_ptr as needed. + * + * When we get to using H5C in other places, we may add + * code to write trace file data at the H5C level as well. + * + * aux_ptr: Pointer to void used to allow wrapper code to associate + * its data with an instance of H5C_t. The H5C cache code + * sets this field to NULL, and otherwise leaves it alone. + * + * max_type_id: Integer field containing the maximum type id number assigned + * to a type of entry in the cache. All type ids from 0 to + * max_type_id inclusive must be defined. The names of the + * types are stored in the type_name_table discussed below, and + * indexed by the ids. + * + * type_name_table_ptr: Pointer to an array of pointer to char of length + * max_type_id + 1. The strings pointed to by the entries + * in the array are the names of the entry types associated + * with the indexing type IDs. + * + * max_cache_size: Nominal maximum number of bytes that may be stored in the + * cache. This value should be viewed as a soft limit, as the + * cache can exceed this value under the following circumstances: + * + * a) All entries in the cache are protected, and the cache is + * asked to insert a new entry. In this case the new entry + * will be created. If this causes the cache to exceed + * max_cache_size, it will do so. The cache will attempt + * to reduce its size as entries are unprotected. + * + * b) When running in parallel mode, the cache may not be + * permitted to flush a dirty entry in response to a read. + * If there are no clean entries available to evict, the + * cache will exceed its maximum size. Again the cache + * will attempt to reduce its size to the max_cache_size + * limit on the next cache write. + * + * c) When an entry increases in size, the cache may exceed + * the max_cache_size limit until the next time the cache + * attempts to load or insert an entry. + * + * min_clean_size: Nominal minimum number of clean bytes in the cache. + * The cache attempts to maintain this number of bytes of + * clean data so as to avoid case b) above. Again, this is + * a soft limit. + * + * + * In addition to the call back functions required for each entry, the + * cache requires the following call back functions for this instance of + * the cache as a whole: + * + * check_write_permitted: In certain applications, the cache may not + * be allowed to write to disk at certain time. If specified, + * the check_write_permitted function is used to determine if + * a write is permissible at any given point in time. + * + * If no such function is specified (i.e. this field is NULL), + * the cache uses the following write_permitted field to + * determine whether writes are permitted. + * + * write_permitted: If check_write_permitted is NULL, this boolean flag + * indicates whether writes are permitted. + * + * log_flush: If provided, this function is called whenever a dirty + * entry is flushed to disk. + * + * + * In cases where memory is plentiful, and performance is an issue, it may + * be useful to disable all cache evictions, and thereby postpone metadata + * writes. The following field is used to implement this. + * + * evictions_enabled: Boolean flag that is initialized to TRUE. When + * this flag is set to FALSE, the metadata cache will not + * attempt to evict entries to make space for newly protected + * entries, and instead the will grow without limit. + * + * Needless to say, this feature must be used with care. + * + * + * The cache requires an index to facilitate searching for entries. The + * following fields support that index. + * + * index_len: Number of entries currently in the hash table used to index + * the cache. + * + * index_size: Number of bytes of cache entries currently stored in the + * hash table used to index the cache. + * + * This value should not be mistaken for footprint of the + * cache in memory. The average cache entry is small, and + * the cache has a considerable overhead. Multiplying the + * index_size by two should yield a conservative estimate + * of the cache's memory footprint. + * + * clean_index_size: Number of bytes of clean entries currently stored in + * the hash table. Note that the index_size field (above) + * is also the sum of the sizes of all entries in the cache. + * Thus we should have the invariant that clean_index_size + + * dirty_index_size == index_size. + * + * WARNING: + * + * 1) The clean_index_size field is not maintained by the + * index macros, as the hash table doesn't care whether + * the entry is clean or dirty. Instead the field is + * maintained in the H5C__UPDATE_RP macros. + * + * 2) The value of the clean_index_size must not be mistaken + * for the current clean size of the cache. Rather, the + * clean size of the cache is the current value of + * clean_index_size plus the amount of empty space (if any) + * in the cache. + * + * dirty_index_size: Number of bytes of dirty entries currently stored in + * the hash table. Note that the index_size field (above) + * is also the sum of the sizes of all entries in the cache. + * Thus we should have the invariant that clean_index_size + + * dirty_index_size == index_size. + * + * WARNING: + * + * 1) The dirty_index_size field is not maintained by the + * index macros, as the hash table doesn't care whether + * the entry is clean or dirty. Instead the field is + * maintained in the H5C__UPDATE_RP macros. + * + * index: Array of pointer to H5C_cache_entry_t of size + * H5C__HASH_TABLE_LEN. At present, this value is a power + * of two, not the usual prime number. + * + * I hope that the variable size of cache elements, the large + * hash table size, and the way in which HDF5 allocates space + * will combine to avoid problems with periodicity. If so, we + * can use a trivial hash function (a bit-and and a 3 bit left + * shift) with some small savings. + * + * If not, it will become evident in the statistics. Changing + * to the usual prime number length hash table will require + * changing the H5C__HASH_FCN macro and the deletion of the + * H5C__HASH_MASK #define. No other changes should be required. + * + * + * With the addition of cache entry tagging, it is possible that + * an entry may be inserted into the cache without a tag during testing + * and the tag's validity shouldn't be checked. + * + * The following field is maintained to facilitate this. + * + * ignore_tags: Boolean flag to disable tag validation during entry insertion. + * + * When we flush the cache, we need to write entries out in increasing + * address order. An instance of a skip list is used to store dirty entries in + * sorted order. Whether it is cheaper to sort the dirty entries as needed, + * or to maintain the list is an open question. At a guess, it depends + * on how frequently the cache is flushed. We will see how it goes. + * + * For now at least, I will not remove dirty entries from the list as they + * are flushed. (this has been changed -- dirty entries are now removed from + * the skip list as they are flushed. JRM - 10/25/05) + * + * slist_len: Number of entries currently in the skip list + * used to maintain a sorted list of dirty entries in the + * cache. + * + * slist_size: Number of bytes of cache entries currently stored in the + * skip list used to maintain a sorted list of + * dirty entries in the cache. + * + * slist_ptr: pointer to the instance of H5SL_t used maintain a sorted + * list of dirty entries in the cache. This sorted list has + * two uses: + * + * a) It allows us to flush dirty entries in increasing address + * order, which results in significant savings. + * + * b) It facilitates checking for adjacent dirty entries when + * attempting to evict entries from the cache. While we + * don't use this at present, I hope that this will allow + * some optimizations when I get to it. + * + * num_last_entries: The number of entries in the cache that can only be + * flushed after all other entries in the cache have + * been flushed. At this time, this will only ever be + * one entry (the superblock), and the code has been + * protected with HDasserts to enforce this. This restraint + * can certainly be relaxed in the future if the need for + * multiple entries being flushed last arises, though + * explicit tests for that case should be added when said + * HDasserts are removed. + * + * With the addition of the fractal heap, the cache must now deal with + * the case in which entries may be dirtied, moved, or have their sizes + * changed during a flush. To allow sanity checks in this situation, the + * following two fields have been added. They are only compiled in when + * H5C_DO_SANITY_CHECKS is TRUE. + * + * slist_len_increase: Number of entries that have been added to the + * slist since the last time this field was set to zero. + * + * slist_size_increase: Total size of all entries that have been added + * to the slist since the last time this field was set to + * zero. + * + * + * When a cache entry is protected, it must be removed from the LRU + * list(s) as it cannot be either flushed or evicted until it is unprotected. + * The following fields are used to implement the protected list (pl). + * + * pl_len: Number of entries currently residing on the protected list. + * + * pl_size: Number of bytes of cache entries currently residing on the + * protected list. + * + * pl_head_ptr: Pointer to the head of the doubly linked list of protected + * entries. Note that cache entries on this list are linked + * by their next and prev fields. + * + * This field is NULL if the list is empty. + * + * pl_tail_ptr: Pointer to the tail of the doubly linked list of protected + * entries. Note that cache entries on this list are linked + * by their next and prev fields. + * + * This field is NULL if the list is empty. + * + * + * For very frequently used entries, the protect/unprotect overhead can + * become burdensome. To avoid this overhead, I have modified the cache + * to allow entries to be "pinned". A pinned entry is similar to a + * protected entry, in the sense that it cannot be evicted, and that + * the entry can be modified at any time. + * + * Pinning an entry has the following implications: + * + * 1) A pinned entry cannot be evicted. Thus unprotected + * pinned entries reside in the pinned entry list, instead + * of the LRU list(s) (or other lists maintained by the current + * replacement policy code). + * + * 2) A pinned entry can be accessed or modified at any time. + * Therefore, the cache must check with the entry owner + * before flushing it. If permission is denied, the + * cache just skips the entry in the flush. + * + * 3) A pinned entry can be marked as dirty (and possibly + * change size) while it is unprotected. + * + * 4) The flush-destroy code must allow pinned entries to + * be unpinned (and possibly unprotected) during the + * flush. + * + * Since pinned entries cannot be evicted, they must be kept on a pinned + * entry list (pel), instead of being entrusted to the replacement policy + * code. + * + * Maintaining the pinned entry list requires the following fields: + * + * pel_len: Number of entries currently residing on the pinned + * entry list. + * + * pel_size: Number of bytes of cache entries currently residing on + * the pinned entry list. + * + * pel_head_ptr: Pointer to the head of the doubly linked list of pinned + * but not protected entries. Note that cache entries on + * this list are linked by their next and prev fields. + * + * This field is NULL if the list is empty. + * + * pel_tail_ptr: Pointer to the tail of the doubly linked list of pinned + * but not protected entries. Note that cache entries on + * this list are linked by their next and prev fields. + * + * This field is NULL if the list is empty. + * + * + * The cache must have a replacement policy, and the fields supporting this + * policy must be accessible from this structure. + * + * While there has been interest in several replacement policies for + * this cache, the initial development schedule is tight. Thus I have + * elected to support only a modified LRU (least recently used) policy + * for the first cut. + * + * To further simplify matters, I have simply included the fields needed + * by the modified LRU in this structure. When and if we add support for + * other policies, it will probably be easiest to just add the necessary + * fields to this structure as well -- we only create one instance of this + * structure per file, so the overhead is not excessive. + * + * + * Fields supporting the modified LRU policy: + * + * See most any OS text for a discussion of the LRU replacement policy. + * + * When operating in parallel mode, we must ensure that a read does not + * cause a write. If it does, the process will hang, as the write will + * be collective and the other processes will not know to participate. + * + * To deal with this issue, I have modified the usual LRU policy by adding + * clean and dirty LRU lists to the usual LRU list. + * + * The clean LRU list is simply the regular LRU list with all dirty cache + * entries removed. + * + * Similarly, the dirty LRU list is the regular LRU list with all the clean + * cache entries removed. + * + * When reading in parallel mode, we evict from the clean LRU list only. + * This implies that we must try to ensure that the clean LRU list is + * reasonably well stocked at all times. + * + * We attempt to do this by trying to flush enough entries on each write + * to keep the cLRU_list_size >= min_clean_size. + * + * Even if we start with a completely clean cache, a sequence of protects + * without unprotects can empty the clean LRU list. In this case, the + * cache must grow temporarily. At the next write, we will attempt to + * evict enough entries to reduce index_size to less than max_cache_size. + * While this will usually be possible, all bets are off if enough entries + * are protected. + * + * Discussions of the individual fields used by the modified LRU replacement + * policy follow: + * + * LRU_list_len: Number of cache entries currently on the LRU list. + * + * Observe that LRU_list_len + pl_len must always equal + * index_len. + * + * LRU_list_size: Number of bytes of cache entries currently residing on the + * LRU list. + * + * Observe that LRU_list_size + pl_size must always equal + * index_size. + * + * LRU_head_ptr: Pointer to the head of the doubly linked LRU list. Cache + * entries on this list are linked by their next and prev fields. + * + * This field is NULL if the list is empty. + * + * LRU_tail_ptr: Pointer to the tail of the doubly linked LRU list. Cache + * entries on this list are linked by their next and prev fields. + * + * This field is NULL if the list is empty. + * + * cLRU_list_len: Number of cache entries currently on the clean LRU list. + * + * Observe that cLRU_list_len + dLRU_list_len must always + * equal LRU_list_len. + * + * cLRU_list_size: Number of bytes of cache entries currently residing on + * the clean LRU list. + * + * Observe that cLRU_list_size + dLRU_list_size must always + * equal LRU_list_size. + * + * cLRU_head_ptr: Pointer to the head of the doubly linked clean LRU list. + * Cache entries on this list are linked by their aux_next and + * aux_prev fields. + * + * This field is NULL if the list is empty. + * + * cLRU_tail_ptr: Pointer to the tail of the doubly linked clean LRU list. + * Cache entries on this list are linked by their aux_next and + * aux_prev fields. + * + * This field is NULL if the list is empty. + * + * dLRU_list_len: Number of cache entries currently on the dirty LRU list. + * + * Observe that cLRU_list_len + dLRU_list_len must always + * equal LRU_list_len. + * + * dLRU_list_size: Number of cache entries currently on the dirty LRU list. + * + * Observe that cLRU_list_len + dLRU_list_len must always + * equal LRU_list_len. + * + * dLRU_head_ptr: Pointer to the head of the doubly linked dirty LRU list. + * Cache entries on this list are linked by their aux_next and + * aux_prev fields. + * + * This field is NULL if the list is empty. + * + * dLRU_tail_ptr: Pointer to the tail of the doubly linked dirty LRU list. + * Cache entries on this list are linked by their aux_next and + * aux_prev fields. + * + * This field is NULL if the list is empty. + * + * + * Automatic cache size adjustment: + * + * While the default cache size is adequate for most cases, we can run into + * cases where the default is too small. Ideally, we will let the user + * adjust the cache size as required. However, this is not possible in all + * cases. Thus I have added automatic cache size adjustment code. + * + * The configuration for the automatic cache size adjustment is stored in + * the structure described below: + * + * size_increase_possible: Depending on the configuration data given + * in the resize_ctl field, it may or may not be possible + * to increase the size of the cache. Rather than test for + * all the ways this can happen, we simply set this flag when + * we receive a new configuration. + * + * flash_size_increase_possible: Depending on the configuration data given + * in the resize_ctl field, it may or may not be possible + * for a flash size increase to occur. We set this flag + * whenever we receive a new configuration so as to avoid + * repeated calculations. + * + * flash_size_increase_threshold: If a flash cache size increase is possible, + * this field is used to store the minimum size of a new entry + * or size increase needed to trigger a flash cache size + * increase. Note that this field must be updated whenever + * the size of the cache is changed. + * + * size_decrease_possible: Depending on the configuration data given + * in the resize_ctl field, it may or may not be possible + * to decrease the size of the cache. Rather than test for + * all the ways this can happen, we simply set this flag when + * we receive a new configuration. + * + * cache_full: Boolean flag used to keep track of whether the cache is + * full, so we can refrain from increasing the size of a + * cache which hasn't used up the space allotted to it. + * + * The field is initialized to FALSE, and then set to TRUE + * whenever we attempt to make space in the cache. + * + * resize_enabled: This is another convenience flag which is set whenever + * a new set of values for resize_ctl are provided. Very + * simply, + * + * resize_enabled = size_increase_possible || + * size_decrease_possible; + * + * size_decreased: Boolean flag set to TRUE whenever the maximum cache + * size is decreased. The flag triggers a call to + * H5C_make_space_in_cache() on the next call to H5C_protect(). + * + * resize_ctl: Instance of H5C_auto_size_ctl_t containing configuration + * data for automatic cache resizing. + * + * epoch_markers_active: Integer field containing the number of epoch + * markers currently in use in the LRU list. This value + * must be in the range [0, H5C__MAX_EPOCH_MARKERS - 1]. + * + * epoch_marker_active: Array of boolean of length H5C__MAX_EPOCH_MARKERS. + * This array is used to track which epoch markers are currently + * in use. + * + * epoch_marker_ringbuf: Array of int of length H5C__MAX_EPOCH_MARKERS + 1. + * + * To manage the epoch marker cache entries, it is necessary + * to track their order in the LRU list. This is done with + * epoch_marker_ringbuf. When markers are inserted at the + * head of the LRU list, the index of the marker in the + * epoch_markers array is inserted at the tail of the ring + * buffer. When it becomes the epoch_marker_active'th marker + * in the LRU list, it will have worked its way to the head + * of the ring buffer as well. This allows us to remove it + * without scanning the LRU list if such is required. + * + * epoch_marker_ringbuf_first: Integer field containing the index of the + * first entry in the ring buffer. + * + * epoch_marker_ringbuf_last: Integer field containing the index of the + * last entry in the ring buffer. + * + * epoch_marker_ringbuf_size: Integer field containing the number of entries + * in the ring buffer. + * + * epoch_markers: Array of instances of H5C_cache_entry_t of length + * H5C__MAX_EPOCH_MARKERS. The entries are used as markers + * in the LRU list to identify cache entries that haven't + * been accessed for some (small) specified number of + * epochs. These entries (if any) can then be evicted and + * the cache size reduced -- ideally without evicting any + * of the current working set. Needless to say, the epoch + * length and the number of epochs before an unused entry + * must be chosen so that all, or almost all, the working + * set will be accessed before the limit. + * + * Epoch markers only appear in the LRU list, never in + * the index or slist. While they are of type + * H5C__EPOCH_MARKER_TYPE, and have associated class + * functions, these functions should never be called. + * + * The addr fields of these instances of H5C_cache_entry_t + * are set to the index of the instance in the epoch_markers + * array, the size is set to 0, and the type field points + * to the constant structure epoch_marker_class defined + * in H5C.c. The next and prev fields are used as usual + * to link the entry into the LRU list. + * + * All other fields are unused. + * + * + * Cache hit rate collection fields: + * + * We supply the current cache hit rate on request, so we must keep a + * simple cache hit rate computation regardless of whether statistics + * collection is enabled. The following fields support this capability. + * + * cache_hits: Number of cache hits since the last time the cache hit + * rate statistics were reset. Note that when automatic cache + * re-sizing is enabled, this field will be reset every automatic + * resize epoch. + * + * cache_accesses: Number of times the cache has been accessed while + * since the last since the last time the cache hit rate statistics + * were reset. Note that when automatic cache re-sizing is enabled, + * this field will be reset every automatic resize epoch. + * + * + * Statistics collection fields: + * + * When enabled, these fields are used to collect statistics as described + * below. The first set are collected only when H5C_COLLECT_CACHE_STATS + * is true. + * + * hits: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the number of times an entry with type id + * equal to the array index has been in cache when requested in + * the current epoch. + * + * misses: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the number of times an entry with type id + * equal to the array index has not been in cache when + * requested in the current epoch. + * + * write_protects: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The + * cells are used to record the number of times an entry with + * type id equal to the array index has been write protected + * in the current epoch. + * + * Observe that (hits + misses) = (write_protects + read_protects). + * + * read_protects: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The + * cells are used to record the number of times an entry with + * type id equal to the array index has been read protected in + * the current epoch. + * + * Observe that (hits + misses) = (write_protects + read_protects). + * + * max_read_protects: Array of int32 of length H5C__MAX_NUM_TYPE_IDS + 1. + * The cells are used to maximum number of simultaneous read + * protects on any entry with type id equal to the array index + * in the current epoch. + * + * insertions: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the number of times an entry with type + * id equal to the array index has been inserted into the + * cache in the current epoch. + * + * pinned_insertions: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. + * The cells are used to record the number of times an entry + * with type id equal to the array index has been inserted + * pinned into the cache in the current epoch. + * + * clears: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the number of times a dirty entry with type + * id equal to the array index has been cleared in the current + * epoch. + * + * flushes: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the number of times an entry with type id + * equal to the array index has been written to disk in the + * current epoch. + * + * evictions: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the number of times an entry with type id + * equal to the array index has been evicted from the cache in + * the current epoch. + * + * moves: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the number of times an entry with type + * id equal to the array index has been moved in the current + * epoch. + * + * entry_flush_moves: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. + * The cells are used to record the number of times an entry + * with type id equal to the array index has been moved + * during its flush callback in the current epoch. + * + * cache_flush_moves: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. + * The cells are used to record the number of times an entry + * with type id equal to the array index has been moved + * during a cache flush in the current epoch. + * + * pins: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the number of times an entry with type + * id equal to the array index has been pinned in the current + * epoch. + * + * unpins: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the number of times an entry with type + * id equal to the array index has been unpinned in the current + * epoch. + * + * dirty_pins: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the number of times an entry with type + * id equal to the array index has been marked dirty while pinned + * in the current epoch. + * + * pinned_flushes: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The + * cells are used to record the number of times an entry + * with type id equal to the array index has been flushed while + * pinned in the current epoch. + * + * pinned_clears: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. The + * cells are used to record the number of times an entry + * with type id equal to the array index has been cleared while + * pinned in the current epoch. + * + * size_increases: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. + * The cells are used to record the number of times an entry + * with type id equal to the array index has increased in + * size in the current epoch. + * + * size_decreases: Array of int64 of length H5C__MAX_NUM_TYPE_IDS + 1. + * The cells are used to record the number of times an entry + * with type id equal to the array index has decreased in + * size in the current epoch. + * + * entry_flush_size_changes: Array of int64 of length + * H5C__MAX_NUM_TYPE_IDS + 1. The cells are used to record + * the number of times an entry with type id equal to the + * array index has changed size while in its flush callback. + * + * cache_flush_size_changes: Array of int64 of length + * H5C__MAX_NUM_TYPE_IDS + 1. The cells are used to record + * the number of times an entry with type id equal to the + * array index has changed size during a cache flush + * + * total_ht_insertions: Number of times entries have been inserted into the + * hash table in the current epoch. + * + * total_ht_deletions: Number of times entries have been deleted from the + * hash table in the current epoch. + * + * successful_ht_searches: int64 containing the total number of successful + * searches of the hash table in the current epoch. + * + * total_successful_ht_search_depth: int64 containing the total number of + * entries other than the targets examined in successful + * searches of the hash table in the current epoch. + * + * failed_ht_searches: int64 containing the total number of unsuccessful + * searches of the hash table in the current epoch. + * + * total_failed_ht_search_depth: int64 containing the total number of + * entries examined in unsuccessful searches of the hash + * table in the current epoch. + * + * max_index_len: Largest value attained by the index_len field in the + * current epoch. + * + * max_index_size: Largest value attained by the index_size field in the + * current epoch. + * + * max_clean_index_size: Largest value attained by the clean_index_size field + * in the current epoch. + * + * max_dirty_index_size: Largest value attained by the dirty_index_size field + * in the current epoch. + * + * max_slist_len: Largest value attained by the slist_len field in the + * current epoch. + * + * max_slist_size: Largest value attained by the slist_size field in the + * current epoch. + * + * max_pl_len: Largest value attained by the pl_len field in the + * current epoch. + * + * max_pl_size: Largest value attained by the pl_size field in the + * current epoch. + * + * max_pel_len: Largest value attained by the pel_len field in the + * current epoch. + * + * max_pel_size: Largest value attained by the pel_size field in the + * current epoch. + * + * calls_to_msic: Total number of calls to H5C_make_space_in_cache + * + * total_entries_skipped_in_msic: Number of clean entries skipped while + * enforcing the min_clean_fraction in H5C_make_space_in_cache(). + * + * total_entries_scanned_in_msic: Number of clean entries skipped while + * enforcing the min_clean_fraction in H5C_make_space_in_cache(). + * + * max_entries_skipped_in_msic: Maximum number of clean entries skipped + * in any one call to H5C_make_space_in_cache(). + * + * max_entries_scanned_in_msic: Maximum number of entries scanned over + * in any one call to H5C_make_space_in_cache(). + * + * entries_scanned_to_make_space: Number of entries scanned only when looking + * for entries to evict in order to make space in cache. + + * The remaining stats are collected only when both H5C_COLLECT_CACHE_STATS + * and H5C_COLLECT_CACHE_ENTRY_STATS are true. + * + * max_accesses: Array of int32 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the maximum number of times any single + * entry with type id equal to the array index has been + * accessed in the current epoch. + * + * min_accesses: Array of int32 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the minimum number of times any single + * entry with type id equal to the array index has been + * accessed in the current epoch. + * + * max_clears: Array of int32 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the maximum number of times any single + * entry with type id equal to the array index has been cleared + * in the current epoch. + * + * max_flushes: Array of int32 of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the maximum number of times any single + * entry with type id equal to the array index has been + * flushed in the current epoch. + * + * max_size: Array of size_t of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the maximum size of any single entry + * with type id equal to the array index that has resided in + * the cache in the current epoch. + * + * max_pins: Array of size_t of length H5C__MAX_NUM_TYPE_IDS + 1. The cells + * are used to record the maximum number of times that any single + * entry with type id equal to the array index that has been + * marked as pinned in the cache in the current epoch. + * + * + * Fields supporting testing: + * + * prefix Array of char used to prefix debugging output. The + * field is intended to allow marking of output of with + * the processes mpi rank. + * + * get_entry_ptr_from_addr_counter: Counter used to track the number of + * times the H5C_get_entry_ptr_from_addr() function has been + * called successfully. This field is only defined when + * NDEBUG is not #defined. + * + ****************************************************************************/ +struct H5C_t { + uint32_t magic; + hbool_t flush_in_progress; + FILE * trace_file_ptr; + void * aux_ptr; + int32_t max_type_id; + const char * (* type_name_table_ptr); + size_t max_cache_size; + size_t min_clean_size; + H5C_write_permitted_func_t check_write_permitted; + hbool_t write_permitted; + H5C_log_flush_func_t log_flush; + hbool_t evictions_enabled; + + /* Fields for maintaining [hash table] index of entries */ + int32_t index_len; + size_t index_size; + size_t clean_index_size; + size_t dirty_index_size; + H5C_cache_entry_t * (index[H5C__HASH_TABLE_LEN]); + + /* Field to disable tag validation */ + hbool_t ignore_tags; + + int32_t slist_len; + size_t slist_size; + H5SL_t * slist_ptr; + int32_t num_last_entries; +#if H5C_DO_SANITY_CHECKS + int64_t slist_len_increase; + int64_t slist_size_increase; +#endif /* H5C_DO_SANITY_CHECKS */ + + /* Fields for tracking protected entries */ + int32_t pl_len; + size_t pl_size; + H5C_cache_entry_t * pl_head_ptr; + H5C_cache_entry_t * pl_tail_ptr; + + /* Fields for tracking pinned entries */ + int32_t pel_len; + size_t pel_size; + H5C_cache_entry_t * pel_head_ptr; + H5C_cache_entry_t * pel_tail_ptr; + + /* Fields for complete LRU list of entries */ + int32_t LRU_list_len; + size_t LRU_list_size; + H5C_cache_entry_t * LRU_head_ptr; + H5C_cache_entry_t * LRU_tail_ptr; + + /* Fields for clean LRU list of entries */ + int32_t cLRU_list_len; + size_t cLRU_list_size; + H5C_cache_entry_t * cLRU_head_ptr; + H5C_cache_entry_t * cLRU_tail_ptr; + + /* Fields for dirty LRU list of entries */ + int32_t dLRU_list_len; + size_t dLRU_list_size; + H5C_cache_entry_t * dLRU_head_ptr; + H5C_cache_entry_t * dLRU_tail_ptr; + + /* Fields for automatic cache size adjustment */ + hbool_t size_increase_possible; + hbool_t flash_size_increase_possible; + size_t flash_size_increase_threshold; + hbool_t size_decrease_possible; + hbool_t resize_enabled; + hbool_t cache_full; + hbool_t size_decreased; + H5C_auto_size_ctl_t resize_ctl; + + /* Fields for epoch markers used in automatic cache size adjustment */ + int32_t epoch_markers_active; + hbool_t epoch_marker_active[H5C__MAX_EPOCH_MARKERS]; + int32_t epoch_marker_ringbuf[H5C__MAX_EPOCH_MARKERS+1]; + int32_t epoch_marker_ringbuf_first; + int32_t epoch_marker_ringbuf_last; + int32_t epoch_marker_ringbuf_size; + H5C_cache_entry_t epoch_markers[H5C__MAX_EPOCH_MARKERS]; + + /* Fields for cache hit rate collection */ + int64_t cache_hits; + int64_t cache_accesses; + +#if H5C_COLLECT_CACHE_STATS + /* stats fields */ + int64_t hits[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t misses[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t write_protects[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t read_protects[H5C__MAX_NUM_TYPE_IDS + 1]; + int32_t max_read_protects[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t insertions[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t pinned_insertions[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t clears[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t flushes[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t evictions[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t moves[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t entry_flush_moves[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t cache_flush_moves[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t pins[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t unpins[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t dirty_pins[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t pinned_flushes[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t pinned_clears[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t size_increases[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t size_decreases[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t entry_flush_size_changes[H5C__MAX_NUM_TYPE_IDS + 1]; + int64_t cache_flush_size_changes[H5C__MAX_NUM_TYPE_IDS + 1]; + + /* Fields for hash table operations */ + int64_t total_ht_insertions; + int64_t total_ht_deletions; + int64_t successful_ht_searches; + int64_t total_successful_ht_search_depth; + int64_t failed_ht_searches; + int64_t total_failed_ht_search_depth; + int32_t max_index_len; + size_t max_index_size; + size_t max_clean_index_size; + size_t max_dirty_index_size; + + /* Fields for in-order skip list */ + int32_t max_slist_len; + size_t max_slist_size; + + /* Fields for protected entry list */ + int32_t max_pl_len; + size_t max_pl_size; + + /* Fields for pinned entry list */ + int32_t max_pel_len; + size_t max_pel_size; + + /* Fields for tacking 'make space in cache' (msic) operations */ + int64_t calls_to_msic; + int64_t total_entries_skipped_in_msic; + int64_t total_entries_scanned_in_msic; + int32_t max_entries_skipped_in_msic; + int32_t max_entries_scanned_in_msic; + int64_t entries_scanned_to_make_space; + +#if H5C_COLLECT_CACHE_ENTRY_STATS + int32_t max_accesses[H5C__MAX_NUM_TYPE_IDS + 1]; + int32_t min_accesses[H5C__MAX_NUM_TYPE_IDS + 1]; + int32_t max_clears[H5C__MAX_NUM_TYPE_IDS + 1]; + int32_t max_flushes[H5C__MAX_NUM_TYPE_IDS + 1]; + size_t max_size[H5C__MAX_NUM_TYPE_IDS + 1]; + int32_t max_pins[H5C__MAX_NUM_TYPE_IDS + 1]; +#endif /* H5C_COLLECT_CACHE_ENTRY_STATS */ +#endif /* H5C_COLLECT_CACHE_STATS */ + + char prefix[H5C__PREFIX_LEN]; + +#ifndef NDEBUG + int64_t get_entry_ptr_from_addr_counter; +#endif /* NDEBUG */ +}; + +/*****************************/ +/* Package Private Variables */ +/*****************************/ + + +/******************************/ +/* Package Private Prototypes */ +/******************************/ + + #endif /* _H5Cpkg_H */ |