summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authormainzer <mainzer#hdfgroup.org>2018-11-04 23:54:01 (GMT)
committermainzer <mainzer#hdfgroup.org>2018-11-04 23:54:01 (GMT)
commite62f4bd4fab00b0cd03e269a858c21558a9321fa (patch)
tree49506dd4873e1413b6c960ebcaf22c269933411c /src
parentd4d7687ad1df35101ed72567c99f1c57536b5ccd (diff)
downloadhdf5-e62f4bd4fab00b0cd03e269a858c21558a9321fa.zip
hdf5-e62f4bd4fab00b0cd03e269a858c21558a9321fa.tar.gz
hdf5-e62f4bd4fab00b0cd03e269a858c21558a9321fa.tar.bz2
Initial checkin of page buffer re-implementation to support VFD SWMR.
Tested serial / debug on Charis and Jelly. Two known issues: 1) New page buffer seems to expose issues in the accumulator code. For whatever reason, fheap with the new page buffer exposes corruption issues if the page buffer uses H5F__accum_read/write(), but the problems go away if the page buffers uses H5FD_read/write() instead. Need to either chase this or dis-able page bufffer in combination with the accumulator. 2) Encountered a number of assertion failures that are explainable by the free space manager code not telling the page buffer to discard pages when they are freed. Wrote code to handle this -- once the free space manager is modified, this code should be removed and the original assertions restored.
Diffstat (limited to 'src')
-rw-r--r--src/H5C.c4
-rw-r--r--src/H5F.c8
-rw-r--r--src/H5Fint.c8
-rw-r--r--src/H5Fpkg.h4
-rw-r--r--src/H5MF.c2
-rw-r--r--src/H5MFsection.c2
-rw-r--r--src/H5PB.c4147
-rw-r--r--src/H5PBpkg.h1498
-rw-r--r--src/H5PBprivate.h566
9 files changed, 5147 insertions, 1092 deletions
diff --git a/src/H5C.c b/src/H5C.c
index a22eca0..52c8dfa 100644
--- a/src/H5C.c
+++ b/src/H5C.c
@@ -6412,8 +6412,8 @@ H5C__flush_single_entry(H5F_t *f, H5C_cache_entry_t *entry_ptr, unsigned flags)
HDassert(!destroy);
HDassert(entry_ptr->image_ptr);
- if(f->shared->page_buf && f->shared->page_buf->page_size >= entry_ptr->size)
- if(H5PB_update_entry(f->shared->page_buf, entry_ptr->addr, entry_ptr->size, entry_ptr->image_ptr) > 0)
+ if(f->shared->pb_ptr && f->shared->pb_ptr->page_size >= entry_ptr->size)
+ if(H5PB_update_entry(f->shared->pb_ptr, entry_ptr->addr, entry_ptr->size, entry_ptr->image_ptr) > 0)
HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "Failed to update PB with metadata cache")
} /* end if */
diff --git a/src/H5F.c b/src/H5F.c
index e168343..4cda672 100644
--- a/src/H5F.c
+++ b/src/H5F.c
@@ -1822,11 +1822,11 @@ H5Freset_page_buffering_stats(hid_t file_id)
/* Check args */
if(NULL == (file = (H5F_t *)H5I_object(file_id)))
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "invalid file identifier")
- if(NULL == file->shared->page_buf)
+ if(NULL == file->shared->pb_ptr)
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "page buffering not enabled on file")
/* Reset the statistics */
- if(H5PB_reset_stats(file->shared->page_buf) < 0)
+ if(H5PB_reset_stats(file->shared->pb_ptr) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, "can't reset stats for page buffering")
done:
@@ -1857,13 +1857,13 @@ H5Fget_page_buffering_stats(hid_t file_id, unsigned accesses[2], unsigned hits[2
/* Check args */
if(NULL == (file = (H5F_t *)H5I_object_verify(file_id, H5I_FILE)))
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "not a file ID")
- if(NULL == file->shared->page_buf)
+ if(NULL == file->shared->pb_ptr)
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "page buffering not enabled on file")
if(NULL == accesses || NULL == hits || NULL == misses || NULL == evictions || NULL == bypasses)
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "NULL input parameters for stats")
/* Get the statistics */
- if(H5PB_get_stats(file->shared->page_buf, accesses, hits, misses, evictions, bypasses) < 0)
+ if(H5PB_get_stats(file->shared->pb_ptr, accesses, hits, misses, evictions, bypasses) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't retrieve stats for page buffering")
done:
diff --git a/src/H5Fint.c b/src/H5Fint.c
index f815a4b..bca09b2 100644
--- a/src/H5Fint.c
+++ b/src/H5Fint.c
@@ -233,12 +233,12 @@ H5F_get_access_plist(H5F_t *f, hbool_t app_ref)
efc_size = H5F__efc_max_nfiles(f->shared->efc);
if(H5P_set(new_plist, H5F_ACS_EFC_SIZE_NAME, &efc_size) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, H5I_INVALID_HID, "can't set elink file cache size")
- if(f->shared->page_buf != NULL) {
- if(H5P_set(new_plist, H5F_ACS_PAGE_BUFFER_SIZE_NAME, &(f->shared->page_buf->max_size)) < 0)
+ if(f->shared->pb_ptr != NULL) {
+ if(H5P_set(new_plist, H5F_ACS_PAGE_BUFFER_SIZE_NAME, &(f->shared->pb_ptr->max_size)) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, H5I_INVALID_HID, "can't set page buffer size")
- if(H5P_set(new_plist, H5F_ACS_PAGE_BUFFER_MIN_META_PERC_NAME, &(f->shared->page_buf->min_meta_perc)) < 0)
+ if(H5P_set(new_plist, H5F_ACS_PAGE_BUFFER_MIN_META_PERC_NAME, &(f->shared->pb_ptr->min_meta_perc)) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, H5I_INVALID_HID, "can't set minimum metadata fraction of page buffer")
- if(H5P_set(new_plist, H5F_ACS_PAGE_BUFFER_MIN_RAW_PERC_NAME, &(f->shared->page_buf->min_raw_perc)) < 0)
+ if(H5P_set(new_plist, H5F_ACS_PAGE_BUFFER_MIN_RAW_PERC_NAME, &(f->shared->pb_ptr->min_raw_perc)) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, H5I_INVALID_HID, "can't set minimum raw data fraction of page buffer")
} /* end if */
diff --git a/src/H5Fpkg.h b/src/H5Fpkg.h
index 3760c41..9e523de 100644
--- a/src/H5Fpkg.h
+++ b/src/H5Fpkg.h
@@ -295,7 +295,9 @@ struct H5F_file_t {
unsigned long feature_flags; /* VFL Driver feature Flags */
haddr_t maxaddr; /* Maximum address for file */
- H5PB_t *page_buf; /* The page buffer cache */
+ H5PB_t *pb_ptr; /* pointer to the page buffer, or NULL */
+ /* if the page buffer is disabled. */
+
H5AC_t *cache; /* The object cache */
H5AC_cache_config_t
mdc_initCacheCfg; /* initial configuration for the */
diff --git a/src/H5MF.c b/src/H5MF.c
index 0d368d4..3be5e0a 100644
--- a/src/H5MF.c
+++ b/src/H5MF.c
@@ -981,7 +981,7 @@ HDfprintf(stderr, "%s: alloc_type = %u, size = %Hu\n", FUNC, (unsigned)alloc_typ
/* Insert the new page into the Page Buffer list of new pages so
we don't read an empty page from disk */
- if(f->shared->page_buf != NULL && H5PB_add_new_page(f, alloc_type, new_page) < 0)
+ if(f->shared->pb_ptr != NULL && H5PB_add_new_page(f, alloc_type, new_page) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTINSERT, HADDR_UNDEF, "can't add new page to Page Buffer new page list")
ret_value = new_page;
diff --git a/src/H5MFsection.c b/src/H5MFsection.c
index 3ebc5c8..1d217a1 100644
--- a/src/H5MFsection.c
+++ b/src/H5MFsection.c
@@ -775,7 +775,7 @@ H5MF__sect_small_merge(H5FS_section_info_t **_sect1, H5FS_section_info_t *_sect2
/* This is in response to the data corruption bug from fheap.c with page buffering + page strategy */
/* Note: Large metadata page bypasses the PB cache */
/* Note: Update of raw data page (large or small sized) is handled by the PB cache */
- if(udata->f->shared->page_buf != NULL && udata->alloc_type != H5FD_MEM_DRAW)
+ if(udata->f->shared->pb_ptr != NULL && udata->alloc_type != H5FD_MEM_DRAW)
if(H5PB_remove_entry(udata->f, (*sect1)->sect_info.addr) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTFREE, FAIL, "can't free merged section")
diff --git a/src/H5PB.c b/src/H5PB.c
index a325ad1..c89c381 100644
--- a/src/H5PB.c
+++ b/src/H5PB.c
@@ -13,9 +13,11 @@
/*-------------------------------------------------------------------------
*
- * Created: H5PB.c
- *
- * Purpose: Page Buffer routines.
+ * Created: H5PB2.c
+ *
+ * Purpose: Re-implementation of the page buffer with added features to
+ * support VFD SWMR.
+ * JRM -- 10/11/18
*
*-------------------------------------------------------------------------
*/
@@ -24,8 +26,10 @@
/* Module Setup */
/****************/
-#define H5F_FRIEND /*suppress error about including H5Fpkg */
-#include "H5PBmodule.h" /* This source code file is part of the H5PB module */
+#define H5F_FRIEND /* suppress error about including H5Fpkg */
+#include "H5PBmodule.h" /* This source code file is part of the
+ * H5PB module
+ */
/***********/
@@ -36,83 +40,30 @@
#include "H5Fpkg.h" /* Files */
#include "H5FDprivate.h" /* File drivers */
#include "H5Iprivate.h" /* IDs */
+#include "H5FLprivate.h" /* Free lists */
+#include "H5MMprivate.h" /* Memory management */
#include "H5PBpkg.h" /* File access */
-#include "H5SLprivate.h" /* Skip List */
/****************/
/* Local Macros */
/****************/
-#define H5PB__PREPEND(page_ptr, head_ptr, tail_ptr, len) { \
- if((head_ptr) == NULL) { \
- (head_ptr) = (page_ptr); \
- (tail_ptr) = (page_ptr); \
- } /* end if */ \
- else { \
- (head_ptr)->prev = (page_ptr); \
- (page_ptr)->next = (head_ptr); \
- (head_ptr) = (page_ptr); \
- } /* end else */ \
- (len)++; \
-} /* H5PB__PREPEND() */
-
-#define H5PB__REMOVE(page_ptr, head_ptr, tail_ptr, len) { \
- if((head_ptr) == (page_ptr)) { \
- (head_ptr) = (page_ptr)->next; \
- if((head_ptr) != NULL) \
- (head_ptr)->prev = NULL; \
- } /* end if */ \
- else \
- (page_ptr)->prev->next = (page_ptr)->next; \
- if((tail_ptr) == (page_ptr)) { \
- (tail_ptr) = (page_ptr)->prev; \
- if((tail_ptr) != NULL) \
- (tail_ptr)->next = NULL; \
- } /* end if */ \
- else \
- (page_ptr)->next->prev = (page_ptr)->prev; \
- page_ptr->next = NULL; \
- page_ptr->prev = NULL; \
- (len)--; \
-}
-
-#define H5PB__INSERT_LRU(page_buf, page_ptr) { \
- HDassert(page_buf); \
- HDassert(page_ptr); \
- /* insert the entry at the head of the list. */ \
- H5PB__PREPEND((page_ptr), (page_buf)->LRU_head_ptr, \
- (page_buf)->LRU_tail_ptr, (page_buf)->LRU_list_len) \
-}
-
-#define H5PB__REMOVE_LRU(page_buf, page_ptr) { \
- HDassert(page_buf); \
- HDassert(page_ptr); \
- /* remove the entry from the list. */ \
- H5PB__REMOVE((page_ptr), (page_buf)->LRU_head_ptr, \
- (page_buf)->LRU_tail_ptr, (page_buf)->LRU_list_len) \
-}
-
-#define H5PB__MOVE_TO_TOP_LRU(page_buf, page_ptr) { \
- HDassert(page_buf); \
- HDassert(page_ptr); \
- /* Remove entry and insert at the head of the list. */ \
- H5PB__REMOVE((page_ptr), (page_buf)->LRU_head_ptr, \
- (page_buf)->LRU_tail_ptr, (page_buf)->LRU_list_len) \
- H5PB__PREPEND((page_ptr), (page_buf)->LRU_head_ptr, \
- (page_buf)->LRU_tail_ptr, (page_buf)->LRU_list_len) \
-}
+
+/* In principle, we should be able to run the page buffer with the
+ * accumulator. However, for whatever reason, the fheap test encounteres
+ * metadata corruption if the page buffer uses H5F__accum_read/write()
+ * for I/O.
+ *
+ * The following #define controls this. Set VFD_IO to FALSE to reproduce
+ * the bug.
+ */
+#define VFD_IO TRUE
/******************/
/* Local Typedefs */
/******************/
-/* Iteration context for destroying page buffer */
-typedef struct {
- H5PB_t *page_buf;
- hbool_t actual_slist;
-} H5PB_ud1_t;
-
/********************/
/* Package Typedefs */
@@ -122,9 +73,44 @@ typedef struct {
/********************/
/* Local Prototypes */
/********************/
-static herr_t H5PB__insert_entry(H5PB_t *page_buf, H5PB_entry_t *page_entry);
-static htri_t H5PB__make_space(H5F_t *f, H5PB_t *page_buf, H5FD_mem_t inserted_type);
-static herr_t H5PB__write_entry(H5F_t *f, H5PB_entry_t *page_entry);
+
+static H5PB_entry_t * H5PB__allocate_page(H5PB_t *pb_ptr, size_t buf_size,
+ hbool_t clean_image);
+
+static herr_t H5PB__create_new_page(H5PB_t *pb_ptr, haddr_t addr, size_t size,
+ H5FD_mem_t type, hbool_t clean_image, H5PB_entry_t **entry_ptr_ptr);
+
+static void H5PB__deallocate_page(H5PB_entry_t *entry_ptr);
+
+static herr_t H5PB__evict_entry(H5PB_t *pb_ptr, H5PB_entry_t *entry_ptr,
+ hbool_t force);
+
+static herr_t H5PB__flush_entry(H5F_t *f, H5PB_t *pb_ptr,
+ H5PB_entry_t *entry_ptr);
+
+static herr_t H5PB__load_page(H5F_t *f, H5PB_t *pb_ptr, haddr_t addr,
+ H5FD_mem_t type, H5PB_entry_t **entry_ptr_ptr);
+
+static herr_t H5PB__make_space(H5F_t *f, H5PB_t *pb_ptr,
+ H5FD_mem_t inserted_type);
+
+static herr_t H5PB__mark_entry_clean(H5PB_t *pb_ptr,
+ H5PB_entry_t *entry_ptr);
+
+static herr_t H5PB__mark_entry_dirty(H5PB_t *pb_ptr,
+ H5PB_entry_t *entry_ptr);
+
+static herr_t H5PB__read_meta(H5F_t *f, H5FD_mem_t type, haddr_t addr,
+ size_t size, void *buf/*out*/);
+
+static herr_t H5PB__read_raw(H5F_t *f, H5FD_mem_t type, haddr_t addr,
+ size_t size, void *buf/*out*/);
+
+static herr_t H5PB__write_meta(H5F_t *f, H5FD_mem_t type, haddr_t addr,
+ size_t size, const void *buf/*out*/);
+
+static herr_t H5PB__write_raw(H5F_t *f, H5FD_mem_t type, haddr_t addr,
+ size_t size, const void *buf/*out*/);
/*********************/
@@ -143,6 +129,8 @@ hbool_t H5_PKG_INIT_VAR = FALSE;
/*******************/
/* Local Variables */
/*******************/
+
+
/* Declare a free list to manage the H5PB_t struct */
H5FL_DEFINE_STATIC(H5PB_t);
@@ -152,39 +140,70 @@ H5FL_DEFINE_STATIC(H5PB_entry_t);
/*-------------------------------------------------------------------------
- * Function: H5PB_reset_stats
*
- * Purpose: This function was created without documentation.
- * What follows is my best understanding of Mohamad's intent.
+ * Function: H5PB_reset_stats
*
- * Reset statistics collected for the page buffer layer.
+ * Purpose: Reset statistics collected for the page buffer layer.
*
* Return: Non-negative on success/Negative on failure
*
- * Programmer: Mohamad Chaarawi
+ * Programmer: John Mainzer -- 10/12/18
+ *
+ * Changes: None.
*
*-------------------------------------------------------------------------
*/
herr_t
-H5PB_reset_stats(H5PB_t *page_buf)
+H5PB_reset_stats(H5PB_t *pb_ptr)
{
+ int i;
+
FUNC_ENTER_NOAPI_NOERR
/* Sanity checks */
- HDassert(page_buf);
-
- page_buf->accesses[0] = 0;
- page_buf->accesses[1] = 0;
- page_buf->hits[0] = 0;
- page_buf->hits[1] = 0;
- page_buf->misses[0] = 0;
- page_buf->misses[1] = 0;
- page_buf->evictions[0] = 0;
- page_buf->evictions[1] = 0;
- page_buf->bypasses[0] = 0;
- page_buf->bypasses[1] = 0;
+ HDassert(pb_ptr);
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+
+ for ( i = 0; i < H5PB__NUM_STAT_TYPES; i++ ) {
+
+ pb_ptr->bypasses[i] = 0;
+ pb_ptr->accesses[i] = 0;
+ pb_ptr->hits[i] = 0;
+ pb_ptr->misses[i] = 0;
+ pb_ptr->loads[i] = 0;
+ pb_ptr->insertions[i] = 0;
+ pb_ptr->flushes[i] = 0;
+ pb_ptr->evictions[i] = 0;
+ pb_ptr->clears[i] = 0;
+ }
+
+ pb_ptr->max_lru_len = 0;
+ pb_ptr->max_lru_size = 0;
+ pb_ptr->lru_md_skips = 0;
+ pb_ptr->lru_rd_skips = 0;
+ pb_ptr->total_ht_insertions = 0;
+ pb_ptr->total_ht_deletions = 0;
+ pb_ptr->successful_ht_searches = 0;
+ pb_ptr->total_successful_ht_search_depth = 0;
+ pb_ptr->failed_ht_searches = 0;
+ pb_ptr->total_failed_ht_search_depth = 0;
+ pb_ptr->max_index_len = 0;
+ pb_ptr->max_index_size = 0;
+ pb_ptr->max_rd_pages = 0;
+ pb_ptr->max_md_pages = 0;
+ pb_ptr->max_mpmde_count = 0;
+ pb_ptr->lru_tl_skips = 0;
+ pb_ptr->lru_dwl_skips = 0;
+ pb_ptr->max_tl_len = 0;
+ pb_ptr->max_tl_size = 0;
+ pb_ptr->delayed_writes = 0;
+ pb_ptr->total_delay = 0;
+ pb_ptr->max_dwl_len = 0;
+ pb_ptr->max_dwl_size = 0;
+ pb_ptr->total_dwl_ins_depth = 0;
FUNC_LEAVE_NOAPI(SUCCEED)
+
} /* H5PB_reset_stats() */
@@ -208,89 +227,264 @@ H5PB_reset_stats(H5PB_t *page_buf)
*-------------------------------------------------------------------------
*/
herr_t
-H5PB_get_stats(const H5PB_t *page_buf, unsigned accesses[2], unsigned hits[2],
+H5PB_get_stats(const H5PB_t *pb_ptr, unsigned accesses[2], unsigned hits[2],
unsigned misses[2], unsigned evictions[2], unsigned bypasses[2])
{
FUNC_ENTER_NOAPI_NOERR
/* Sanity checks */
- HDassert(page_buf);
-
- accesses[0] = page_buf->accesses[0];
- accesses[1] = page_buf->accesses[1];
- hits[0] = page_buf->hits[0];
- hits[1] = page_buf->hits[1];
- misses[0] = page_buf->misses[0];
- misses[1] = page_buf->misses[1];
- evictions[0] = page_buf->evictions[0];
- evictions[1] = page_buf->evictions[1];
- bypasses[0] = page_buf->bypasses[0];
- bypasses[1] = page_buf->bypasses[1];
+ HDassert(pb_ptr);
+
+ accesses[0] = (unsigned)pb_ptr->accesses[0];
+ accesses[1] = (unsigned)pb_ptr->accesses[1];
+ accesses[2] = (unsigned)pb_ptr->accesses[2];
+ hits[0] = (unsigned)pb_ptr->hits[0];
+ hits[1] = (unsigned)pb_ptr->hits[1];
+ hits[2] = (unsigned)pb_ptr->hits[2];
+ misses[0] = (unsigned)pb_ptr->misses[0];
+ misses[1] = (unsigned)pb_ptr->misses[1];
+ misses[2] = (unsigned)pb_ptr->misses[2];
+ evictions[0] = (unsigned)pb_ptr->evictions[0];
+ evictions[1] = (unsigned)pb_ptr->evictions[1];
+ evictions[2] = (unsigned)pb_ptr->evictions[2];
+ bypasses[0] = (unsigned)pb_ptr->bypasses[0];
+ bypasses[1] = (unsigned)pb_ptr->bypasses[1];
+ bypasses[2] = (unsigned)pb_ptr->bypasses[2];
FUNC_LEAVE_NOAPI(SUCCEED)
} /* H5PB_get_stats */
/*-------------------------------------------------------------------------
+ *
* Function: H5PB_print_stats()
*
- * Purpose: This function was created without documentation.
- * What follows is my best understanding of Mohamad's intent.
+ * Purpose: Print out statistics collected for the page buffer layer.
*
- * Print out statistics collected for the page buffer layer.
+ * Return: Non-negative on success/Negative on failure
*
- * Return: Non-negative on success/Negative on failure
+ * Programmer: John Mainzer -- 10/12/18
*
- * Programmer: Mohamad Chaarawi
+ * Changes: None.
*
*-------------------------------------------------------------------------
*/
herr_t
-H5PB_print_stats(const H5PB_t *page_buf)
+H5PB_print_stats(const H5PB_t *pb_ptr)
{
+ double ave_succ_search_depth = 0.0L;
+ double ave_failed_search_depth = 0.0L;
+ double ave_delayed_write = 0.0L;
+ double ave_delayed_write_ins_depth = 0.0L;
+
FUNC_ENTER_NOAPI_NOINIT_NOERR
- HDassert(page_buf);
+ HDassert(pb_ptr);
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+
+ HDfprintf(stdout, "\n\nPage Buffer Statistics (raw/meta/mpmde): \n\n");
+
+ HDfprintf(stdout, "bypasses = %lld (%lld/%lld/%lld)\n",
+ (pb_ptr->bypasses[0] + pb_ptr->bypasses[1] + pb_ptr->bypasses[2]),
+ pb_ptr->bypasses[0], pb_ptr->bypasses[1], pb_ptr->bypasses[2]);
+
+ HDfprintf(stdout, "acesses = %lld (%lld/%lld/%lld)\n",
+ (pb_ptr->accesses[0] + pb_ptr->accesses[1] + pb_ptr->accesses[2]),
+ pb_ptr->accesses[0], pb_ptr->accesses[1], pb_ptr->accesses[2]);
+
+ HDfprintf(stdout, "hits = %lld (%lld/%lld/%lld)\n",
+ (pb_ptr->hits[0] + pb_ptr->hits[1] + pb_ptr->hits[2]),
+ pb_ptr->hits[0], pb_ptr->hits[1], pb_ptr->hits[2]);
+
+ HDfprintf(stdout, "misses = %lld (%lld/%lld/%lld)\n",
+ (pb_ptr->misses[0] + pb_ptr->misses[1] + pb_ptr->misses[2]),
+ pb_ptr->misses[0], pb_ptr->misses[1], pb_ptr->misses[2]);
+
+ HDfprintf(stdout, "loads = %lld (%lld/%lld/%lld)\n",
+ (pb_ptr->loads[0] + pb_ptr->loads[1] + pb_ptr->loads[2]),
+ pb_ptr->loads[0], pb_ptr->loads[1], pb_ptr->loads[2]);
+
+ HDfprintf(stdout, "insertions = %lld (%lld/%lld/%lld)\n",
+ (pb_ptr->insertions[0] + pb_ptr->insertions[1] +
+ pb_ptr->insertions[2]),
+ pb_ptr->insertions[0], pb_ptr->insertions[1],
+ pb_ptr->insertions[2]);
+
+ HDfprintf(stdout, "flushes = %lld (%lld/%lld/%lld)\n",
+ (pb_ptr->flushes[0] + pb_ptr->flushes[1] + pb_ptr->flushes[2]),
+ pb_ptr->flushes[0], pb_ptr->flushes[1], pb_ptr->flushes[2]);
+
+ HDfprintf(stdout, "evictions = %lld (%lld/%lld/%lld)\n",
+ (pb_ptr->evictions[0] + pb_ptr->evictions[1] +
+ pb_ptr->evictions[2]),
+ pb_ptr->evictions[0], pb_ptr->evictions[1], pb_ptr->evictions[2]);
+
+ HDfprintf(stdout, "clears = %lld (%lld/%lld/%lld)\n",
+ (pb_ptr->clears[0] + pb_ptr->clears[1] + pb_ptr->clears[2]),
+ pb_ptr->clears[0], pb_ptr->clears[1], pb_ptr->clears[2]);
+
+ HDfprintf(stdout, "max LRU len / size = %lld / %lld\n",
+ pb_ptr->max_lru_len, pb_ptr->max_lru_size);
+
+ HDfprintf(stdout,
+ "LRU make space md/rd/tl/dwl skips = %lld/%lld/%lld/%lld\n",
+ pb_ptr->lru_md_skips, pb_ptr->lru_rd_skips,
+ pb_ptr->lru_tl_skips, pb_ptr->lru_dwl_skips);
+
+ HDfprintf(stdout, "hash table insertions / deletions = %lld / %lld\n",
+ pb_ptr->total_ht_insertions, pb_ptr->total_ht_deletions);
+
+ if ( pb_ptr->successful_ht_searches > 0 ) {
+
+ ave_succ_search_depth =
+ (double)(pb_ptr->total_successful_ht_search_depth) /
+ (double)(pb_ptr->successful_ht_searches);
+ }
+ HDfprintf(stdout, "successful ht searches / ave depth = %lld / %llf\n",
+ pb_ptr->successful_ht_searches, ave_succ_search_depth);
+
+ if ( pb_ptr->failed_ht_searches > 0 ) {
- printf("PAGE BUFFER STATISTICS:\n");
+ ave_failed_search_depth =
+ (double)(pb_ptr->total_failed_ht_search_depth) /
+ (double)(pb_ptr->failed_ht_searches);
+ }
+ HDfprintf(stdout, "failed ht searches / ave depth = %lld / %llf\n",
+ pb_ptr->failed_ht_searches, ave_failed_search_depth);
- HDprintf("******* METADATA\n");
- HDprintf("\t Total Accesses: %u\n", page_buf->accesses[0]);
- HDprintf("\t Hits: %u\n", page_buf->hits[0]);
- HDprintf("\t Misses: %u\n", page_buf->misses[0]);
- HDprintf("\t Evictions: %u\n", page_buf->evictions[0]);
- HDprintf("\t Bypasses: %u\n", page_buf->bypasses[0]);
- HDprintf("\t Hit Rate = %f%%\n", ((double)page_buf->hits[0]/(page_buf->accesses[0] - page_buf->bypasses[0]))*100);
- HDprintf("*****************\n\n");
+ HDfprintf(stdout, "max index length / size = %lld / %lld\n",
+ pb_ptr->max_index_len, pb_ptr->max_index_size);
- HDprintf("******* RAWDATA\n");
- HDprintf("\t Total Accesses: %u\n", page_buf->accesses[1]);
- HDprintf("\t Hits: %u\n", page_buf->hits[1]);
- HDprintf("\t Misses: %u\n", page_buf->misses[1]);
- HDprintf("\t Evictions: %u\n", page_buf->evictions[1]);
- HDprintf("\t Bypasses: %u\n", page_buf->bypasses[1]);
- HDprintf("\t Hit Rate = %f%%\n", ((double)page_buf->hits[1]/(page_buf->accesses[1]-page_buf->bypasses[0]))*100);
- HDprintf("*****************\n\n");
+ HDfprintf(stdout, "max rd / md / mpmde entries = %lld / %lld / %lld\n",
+ pb_ptr->max_rd_pages, pb_ptr->max_md_pages,
+ pb_ptr->max_mpmde_count);
+
+ HDfprintf(stdout, "tick list max len / size = %lld / %lld\n",
+ pb_ptr->max_tl_len, pb_ptr->max_tl_size);
+
+ HDfprintf(stdout, "delayed write list max len / size = %lld / %lld\n",
+ pb_ptr->max_dwl_len, pb_ptr->max_dwl_size);
+
+ if ( pb_ptr->delayed_writes > 0 ) {
+
+ ave_delayed_write = (double)(pb_ptr->total_delay) /
+ (double)(pb_ptr->delayed_writes);
+ ave_delayed_write_ins_depth = (double)(pb_ptr->total_dwl_ins_depth) /
+ (double)(pb_ptr->delayed_writes);
+ }
+ HDfprintf(stdout,
+ "delayed writes / ave delay / ave ins depth = %lld / %llf / %llf\n",
+ pb_ptr->delayed_writes, ave_delayed_write, ave_delayed_write_ins_depth);
FUNC_LEAVE_NOAPI(SUCCEED)
+
} /* H5PB_print_stats */
/*-------------------------------------------------------------------------
+ *
+ * Function: H5PB_add_new_page
+ *
+ * Purpose: Insert a new blank page to the page buffer if the page
+ * buffer is configured to allow pages of the specified
+ * type.
+ *
+ * This function is called by the
+ * from the MF layer when a new page is allocated to
+ * indicate to the page buffer layer that a read of the page
+ * from the file is not necessary since it's an empty page.
+ *
+ * Note that this function inserts the new page without
+ * attempting to make space. This can result in the page
+ * buffer exceeding its maximum size.
+ *
+ * Note also that it is possible that the page (marked clean)
+ * will be evicted before its first use.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer -- 10/12/18
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5PB_add_new_page(H5F_t *f, H5FD_mem_t type, haddr_t page_addr)
+{
+ hbool_t can_insert = TRUE;
+ H5PB_t *pb_ptr = NULL;
+ H5PB_entry_t *entry_ptr = NULL;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /* Sanity checks */
+ HDassert(f);
+ HDassert(f->shared);
+ HDassert(f->shared->pb_ptr);
+
+ pb_ptr = f->shared->pb_ptr;
+
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+
+ if ( H5FD_MEM_DRAW == type ) { /* raw data page insertion */
+
+ if ( pb_ptr->min_md_pages == pb_ptr->max_pages ) {
+
+ can_insert = FALSE;
+
+ }
+ } else { /* metadata page insertion */
+
+ if ( pb_ptr->min_rd_pages == pb_ptr->max_pages ) {
+
+ can_insert = FALSE;
+ }
+ }
+
+ if ( can_insert ) {
+
+ if ( H5PB__create_new_page(pb_ptr, page_addr,
+ (size_t)(pb_ptr->page_size),
+ type, TRUE, &entry_ptr) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "new page buffer page creation failed.")
+
+ /* updates stats */
+ H5PB__UPDATE_STATS_FOR_INSERTION(pb_ptr, entry_ptr);
+ }
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5PB_add_new_page */
+
+
+/*-------------------------------------------------------------------------
+ *
* Function: H5PB_create
*
- * Purpose: Create and setup the PB on the file.
+ * Purpose: Setup a page buffer for the supplied file.
*
* Return: Non-negative on success/Negative on failure
*
- * Programmer: Mohamad Chaarawi
+ * Programmer: John Mainzer -- 10/11/18
+ *
+ * Changes: None.
*
*-------------------------------------------------------------------------
*/
herr_t
-H5PB_create(H5F_t *f, size_t size, unsigned page_buf_min_meta_perc, unsigned page_buf_min_raw_perc)
+H5PB_create(H5F_t *f, size_t size, unsigned page_buf_min_meta_perc,
+ unsigned page_buf_min_raw_perc)
{
- H5PB_t *page_buf = NULL;
+ int i;
+ int32_t min_md_pages;
+ int32_t min_rd_pages;
+ H5PB_t *pb_ptr = NULL;
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(FAIL)
@@ -298,1221 +492,3134 @@ H5PB_create(H5F_t *f, size_t size, unsigned page_buf_min_meta_perc, unsigned pag
/* Sanity checks */
HDassert(f);
HDassert(f->shared);
+ HDassert(page_buf_min_meta_perc <= 100);
+ HDassert(page_buf_min_raw_perc <= 100);
+ HDassert((page_buf_min_meta_perc + page_buf_min_raw_perc) <= 100);
/* Check args */
- if(f->shared->fs_strategy != H5F_FSPACE_STRATEGY_PAGE)
- HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, FAIL, "Enabling Page Buffering requires PAGE file space strategy")
- /* round down the size if it is larger than the page size */
- else if(size > f->shared->fs_page_size) {
+ if ( f->shared->fs_strategy != H5F_FSPACE_STRATEGY_PAGE )
+
+ HGOTO_ERROR(H5E_FILE, H5E_CANTINIT, FAIL, \
+ "Enabling Page Buffering requires PAGE file space strategy")
+
+ else if ( size > f->shared->fs_page_size ) {
+
+ /* round size down to the next multiple of fs_page_size */
+
hsize_t temp_size;
temp_size = (size / f->shared->fs_page_size) * f->shared->fs_page_size;
+
H5_CHECKED_ASSIGN(size, size_t, temp_size, hsize_t);
- } /* end if */
- else if(0 != size % f->shared->fs_page_size)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTINIT, FAIL, "Page Buffer size must be >= to the page size")
- /* Allocate the new page buffering structure */
- if(NULL == (page_buf = H5FL_CALLOC(H5PB_t)))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_NOSPACE, FAIL, "memory allocation failed")
+ } /* end if */
+ else if ( 0 != size % f->shared->fs_page_size )
- page_buf->max_size = size;
- H5_CHECKED_ASSIGN(page_buf->page_size, size_t, f->shared->fs_page_size, hsize_t);
- page_buf->min_meta_perc = page_buf_min_meta_perc;
- page_buf->min_raw_perc = page_buf_min_raw_perc;
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTINIT, FAIL, \
+ "Page Buffer size must be >= to the page size")
/* Calculate the minimum page count for metadata and raw data
* based on the fractions provided
*/
- page_buf->min_meta_count = (unsigned)((size * page_buf_min_meta_perc) / (f->shared->fs_page_size * 100));
- page_buf->min_raw_count = (unsigned)((size * page_buf_min_raw_perc) / (f->shared->fs_page_size * 100));
+ min_md_pages = (int32_t)((size * page_buf_min_meta_perc) /
+ (f->shared->fs_page_size * 100));
+ min_rd_pages = (int32_t)((size * page_buf_min_raw_perc) /
+ (f->shared->fs_page_size * 100));
+ HDassert(min_md_pages >= 0);
+ HDassert(min_rd_pages >= 0);
+ HDassert((min_md_pages + min_rd_pages) <=
+ (int32_t)(size / f->shared->fs_page_size));
+
+
+ /* Allocate the new page buffering structure */
+ if(NULL == (pb_ptr = H5FL_MALLOC(H5PB_t)))
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_NOSPACE, FAIL, "memory allocation failed")
+
+ /* initialize the new instance of H5PB_t */
+
+ pb_ptr->magic = H5PB__H5PB_T_MAGIC;
+ pb_ptr->page_size = f->shared->fs_page_size;
+ H5_CHECKED_ASSIGN(pb_ptr->page_size, size_t, \
+ f->shared->fs_page_size, hsize_t);
+ pb_ptr->max_pages = (int32_t)(size / f->shared->fs_page_size);
+ pb_ptr->curr_pages = 0;
+ pb_ptr->curr_md_pages = 0;
+ pb_ptr->curr_rd_pages = 0;
+ pb_ptr->min_md_pages = min_md_pages;
+ pb_ptr->min_rd_pages = min_rd_pages;
+
+ pb_ptr->max_size = size;
+ pb_ptr->min_meta_perc = page_buf_min_meta_perc;
+ pb_ptr->min_raw_perc = page_buf_min_raw_perc;
+
+ /* index */
+ for ( i = 0; i < H5PB__HASH_TABLE_LEN; i++ )
+ pb_ptr->ht[i] = NULL;
+ pb_ptr->index_len = 0;
+ pb_ptr->index_size = 0;
+
+ /* LRU */
+ pb_ptr->LRU_len = 0;
+ pb_ptr->LRU_size = 0;
+ pb_ptr->LRU_head_ptr = NULL;
+ pb_ptr->LRU_tail_ptr = NULL;
+
+
+ /* VFD SWMR specific fields.
+ * The following fields are defined iff vfd_swmr_writer is TRUE.
+ */
+ pb_ptr->vfd_swmr_writer = FALSE;
+ pb_ptr->mpmde_count = 0;
+ pb_ptr->cur_tick = 0;
- if(NULL == (page_buf->slist_ptr = H5SL_create(H5SL_TYPE_HADDR, NULL)))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTCREATE, FAIL, "can't create skip list")
- if(NULL == (page_buf->mf_slist_ptr = H5SL_create(H5SL_TYPE_HADDR, NULL)))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTCREATE, FAIL, "can't create skip list")
+ /* delayed write list */
+ pb_ptr->max_delay = 0;
+ pb_ptr->dwl_len = 0;
+ pb_ptr->dwl_size = 0;
+ pb_ptr->dwl_head_ptr = NULL;
+ pb_ptr->dwl_tail_ptr = NULL;
- if(NULL == (page_buf->page_fac = H5FL_fac_init(page_buf->page_size)))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTINIT, FAIL, "can't create page factory")
+ /* tick list */
+ pb_ptr->tl_len = 0;
+ pb_ptr->tl_size = 0;
+ pb_ptr->tl_head_ptr = NULL;
+ pb_ptr->tl_tail_ptr = NULL;
- f->shared->page_buf = page_buf;
+ H5PB_reset_stats(pb_ptr);
+
+ f->shared->pb_ptr = pb_ptr;
done:
- if(ret_value < 0) {
- if(page_buf != NULL) {
- if(page_buf->slist_ptr != NULL)
- H5SL_close(page_buf->slist_ptr);
- if(page_buf->mf_slist_ptr != NULL)
- H5SL_close(page_buf->mf_slist_ptr);
- if(page_buf->page_fac != NULL)
- H5FL_fac_term(page_buf->page_fac);
- page_buf = H5FL_FREE(H5PB_t, page_buf);
- } /* end if */
- } /* end if */
+
+ if ( ret_value < 0 ) {
+
+ if ( pb_ptr != NULL ) {
+
+ pb_ptr = H5FL_FREE(H5PB_t, pb_ptr);
+
+ }
+ }
FUNC_LEAVE_NOAPI(ret_value)
+
} /* H5PB_create */
/*-------------------------------------------------------------------------
- * Function: H5PB__flush_cb
*
- * Purpose: Callback to flush PB skiplist entries.
+ * Function: H5PB_dest
+ *
+ * Purpose: Flush (if necessary) and evict all entries in the page
+ * buffer, and then discard the page buffer.
*
* Return: Non-negative on success/Negative on failure
*
- * Programmer: Mohamad Chaarawi
+ * Programmer: John Mainzer -- 10/22/18
+ *
+ * Changes: None.
*
*-------------------------------------------------------------------------
*/
-static herr_t
-H5PB__flush_cb(void *item, void H5_ATTR_UNUSED *key, void *_op_data)
+herr_t
+H5PB_dest(H5F_t *f)
{
- H5PB_entry_t *page_entry = (H5PB_entry_t *)item; /* Pointer to page entry node */
- H5F_t *f = (H5F_t *)_op_data;
+ int i;
+ H5PB_t *pb_ptr = NULL;
+ H5PB_entry_t *entry_ptr = NULL;
+ H5PB_entry_t *evict_ptr = NULL;
herr_t ret_value = SUCCEED; /* Return value */
- FUNC_ENTER_STATIC
+ FUNC_ENTER_NOAPI(FAIL)
- /* Sanity checks */
- HDassert(page_entry);
+ /* Sanity check */
HDassert(f);
+ HDassert(f->shared);
- /* Flush the page if it's dirty */
- if(page_entry->is_dirty)
- if(H5PB__write_entry(f, page_entry) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, "file write failed")
+ /* flush and destroy the page buffer, if it exists */
+ if ( f->shared->pb_ptr ) {
+
+ pb_ptr = f->shared->pb_ptr;
+
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+
+ /* the current implementation if very inefficient, and will
+ * fail if there are any outstanding delayed writes -- must fix this
+ */
+ for ( i = 0; i < H5PB__HASH_TABLE_LEN; i++ ) {
+
+ entry_ptr = pb_ptr->ht[i];
+
+ while ( entry_ptr ) {
+
+ HDassert(entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+
+ evict_ptr = entry_ptr;
+ entry_ptr = entry_ptr->ht_next;
+
+ if ( evict_ptr->is_dirty ) {
+
+ if ( H5PB__flush_entry(f, pb_ptr, evict_ptr) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, \
+ "Can't flush entry")
+ }
+
+ if ( H5PB__evict_entry(pb_ptr, evict_ptr, TRUE) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "forced eviction failed")
+
+ entry_ptr = pb_ptr->ht[i];
+ }
+ }
+
+ /* regular operations fields */
+ HDassert(pb_ptr->curr_pages == 0);
+ HDassert(pb_ptr->curr_md_pages == 0);
+ HDassert(pb_ptr->curr_rd_pages == 0);
+ HDassert(pb_ptr->index_len == 0);
+ HDassert(pb_ptr->index_size == 0);
+ HDassert(pb_ptr->LRU_len == 0);
+ HDassert(pb_ptr->LRU_size == 0);
+ HDassert(pb_ptr->LRU_head_ptr == NULL);
+ HDassert(pb_ptr->LRU_tail_ptr == NULL);
+
+ /* VFD SWMR fields */
+ HDassert(pb_ptr->dwl_len == 0);
+ HDassert(pb_ptr->dwl_size == 0);
+ HDassert(pb_ptr->dwl_head_ptr == NULL);
+ HDassert(pb_ptr->dwl_tail_ptr == NULL);
+
+ HDassert(pb_ptr->tl_len == 0);
+ HDassert(pb_ptr->tl_size == 0);
+ HDassert(pb_ptr->tl_head_ptr == NULL);
+ HDassert(pb_ptr->tl_tail_ptr == NULL);
+
+ pb_ptr->magic = 0;
+ f->shared->pb_ptr = H5FL_FREE(H5PB_t, pb_ptr);
+ }
done:
+
FUNC_LEAVE_NOAPI(ret_value)
-} /* H5PB__flush_cb() */
+
+} /* H5PB_dest */
/*-------------------------------------------------------------------------
+ *
* Function: H5PB_flush
*
- * Purpose: Flush/Free all the PB entries to the file.
+ * Purpose: If the page buffer is defined, flush all entries.
*
* Return: Non-negative on success/Negative on failure
*
- * Programmer: Mohamad Chaarawi
+ * Programmer: John Mainzer -- 10/22/18
+ *
+ * Changes: None.
*
*-------------------------------------------------------------------------
*/
herr_t
H5PB_flush(H5F_t *f)
{
+ int i;
+ H5PB_t *pb_ptr = NULL;
+ H5PB_entry_t *entry_ptr = NULL;
+ H5PB_entry_t *flush_ptr = NULL;
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(FAIL)
/* Sanity check */
HDassert(f);
+ HDassert(f->shared);
- /* Flush all the entries in the PB skiplist, if we have write access on the file */
- if(f->shared->page_buf && (H5F_ACC_RDWR & H5F_INTENT(f))) {
- H5PB_t *page_buf = f->shared->page_buf;
+ pb_ptr = f->shared->pb_ptr;
- /* Iterate over all entries in page buffer skip list */
- if(H5SL_iterate(page_buf->slist_ptr, H5PB__flush_cb, (void *)f))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_BADITER, FAIL, "can't flush page buffer skip list")
- } /* end if */
+ if ( pb_ptr ) {
+
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+
+ /* the current implementation if very inefficient, and will
+ * fail if there are any delayed writes -- must fix this
+ */
+ for ( i = 0; i < H5PB__HASH_TABLE_LEN; i++ ) {
+
+ entry_ptr = pb_ptr->ht[i];
+
+ while ( entry_ptr ) {
+
+ HDassert(entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+
+ flush_ptr = entry_ptr;
+ entry_ptr = entry_ptr->ht_next;
+
+ if ( flush_ptr->is_dirty ) {
+
+ if ( H5PB__flush_entry(f, pb_ptr, flush_ptr) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, \
+ "Can't flush entry")
+ }
+ }
+ }
+ }
done:
+
FUNC_LEAVE_NOAPI(ret_value)
+
} /* H5PB_flush */
/*-------------------------------------------------------------------------
- * Function: H5PB__dest_cb
*
- * Purpose: Callback to free PB skiplist entries.
+ * Function: H5PB_page_exists
+ *
+ * Purpose: Test to see if a page buffer page exists at the specified
+ * address. Set *page_exists_ptr to TRUE or FALSE accordingly.
+ *
+ * This function exists for the convenience of the test
+ * code
*
* Return: Non-negative on success/Negative on failure
*
- * Programmer: Mohamad Chaarawi
+ * Programmer: John Mainzer -- 10/22/18
+ *
+ * Changes: None.
*
*-------------------------------------------------------------------------
*/
-static herr_t
-H5PB__dest_cb(void *item, void H5_ATTR_UNUSED *key, void *_op_data)
+herr_t
+H5PB_page_exists(H5F_t *f, haddr_t addr, hbool_t *page_exists_ptr)
{
- H5PB_entry_t *page_entry = (H5PB_entry_t *)item; /* Pointer to page entry node */
- H5PB_ud1_t *op_data = (H5PB_ud1_t *)_op_data;
+ uint64_t page;
+ H5PB_t *pb_ptr = NULL;
+ H5PB_entry_t *entry_ptr = NULL;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
- FUNC_ENTER_STATIC_NOERR
+ /* Sanity check */
+ HDassert(f);
+ HDassert(f->shared);
+ HDassert(f->shared->pb_ptr);
- /* Sanity checking */
- HDassert(page_entry);
- HDassert(op_data);
- HDassert(op_data->page_buf);
+ pb_ptr = f->shared->pb_ptr;
- /* Remove entry from LRU list */
- if(op_data->actual_slist) {
- H5PB__REMOVE_LRU(op_data->page_buf, page_entry)
- page_entry->page_buf_ptr = H5FL_FAC_FREE(op_data->page_buf->page_fac, page_entry->page_buf_ptr);
- } /* end if */
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert(page_exists_ptr);
- /* Free page entry */
- page_entry = H5FL_FREE(H5PB_entry_t, page_entry);
+ /* Calculate the page offset */
+ page = (addr / pb_ptr->page_size);
- FUNC_LEAVE_NOAPI(SUCCEED)
-} /* H5PB__dest_cb() */
+ /* the supplied address should be page aligned */
+ HDassert(addr == page * pb_ptr->page_size);
+
+ /* Search for page in the hash table */
+ H5PB__SEARCH_INDEX(pb_ptr, page, entry_ptr, FAIL)
+
+ HDassert((NULL == entry_ptr) || (entry_ptr->addr == addr));
+
+ *page_exists_ptr = ( entry_ptr != NULL );
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5PB_page_exists */
/*-------------------------------------------------------------------------
- * Function: H5PB_dest
*
- * Purpose: Flush and destroy the PB on the file if it exists.
+ * Function: H5PB_read
+ *
+ * Purpose: Satisfy the read from the page buffer if possible.
+ *
+ * 1) If the page buffer is disabled, simply read from the
+ * HDF5 file and return.
+ *
+ * 2) If the read is for raw data, and the page buffer is
+ * configured for metadata only (i.e. min_md_pages ==
+ * max_pages), simply read from the HDF5 file and return.
+ *
+ * 3) If the read is for raw data, and it of page size or
+ * larger, read it directly from the HDF5 file.
+ *
+ * It is possible that the page buffer contains dirty pages
+ * that intersect with the read -- test for this and update
+ * the read buffer from the page buffer if any such pages
+ * exist.
+ *
+ * Note that no pages are inserted into the page buffer in
+ * this case.
+ *
+ * 4) If the read is for raw data, and it is of size less
+ * than the page size, satisfy the read from the page
+ * buffer, loading and inserting pages into the
+ * page buffer as necessary
+ *
+ * 5) If the read is for metadata, and the page buffer is
+ * configured for raw data only (i.e. min_rd_pages ==
+ * max_pages), simply read from the HDF5 file and return.
+ *
+ * The free space manager guarantees that allocations larger
+ * than one page will be page alligned, and that allocations
+ * of size less than or equal to page size will not cross page
+ * boundaries. Further, unlike raw data, metadata is always
+ * written and read atomically.
+ *
+ * In principle, this should make it easy to discriminate
+ * between small and multi-page metadata entries so that
+ * pages containing the former will be buffered and the
+ * latter be read directly from file.
+ *
+ * Unfortunately, the metadata cache does not always know the
+ * size of metadata entries when it tries to read them. In
+ * such cases, it issues speculative reads that may be either
+ * smaller or larger than the actual size of the piece of
+ * metadata that is finally read.
+ *
+ * Since we are guaranteed that all metadata allocations larger
+ * that one page are page aligned, we can safely clip at the
+ * page boundary any non page aligned metadata read that crosses
+ * page boundaries.
+ *
+ * However, page aligned reads could wind up being either
+ * small or multi-page. This results in two scenarios that
+ * we must handle:
+ *
+ * a) A page aligned read of size less than one page
+ * turns out to be mult-page.
+ *
+ * In this case, the initial speculative read will
+ * result in a page load and insertion into the page
+ * buffer. This page must be evicted on the subsequent
+ * read of size greater than page size.
+ *
+ * In the context of VFD SWMR, it is also possible that
+ * that the multi-page metadata entry is already in the
+ * page buffer -- in which case the initial read should
+ * be satisfied from the multi-page page buffer entry.
+ *
+ * b) A page aligned, larger than one page read turns out
+ * to be small (less than one page).
+ *
+ * If there is already a page in the page buffer with
+ * same address, we can safely clip the original
+ * read to page size
+ *
+ * The above considerations resolve into the following cases:
+ *
+ * 6) If the read is for metadata and not page aligned, clip
+ * the read to the end of the current page if necessary.
+ * Load the relevant page if necessary and satisfy the
+ * read from the page buffer. Note that it there is an
+ * existing page, it must not be a multi-page metadata
+ * entry. It it is, flag an error.
+ *
+ * 7) If the read is for metadata, is page aligned, is larger
+ * than one page, and there is no entry in the page buffer,
+ * satisfy the read from the file
+ *
+ * 8) If the read is for metadata, is page aligned, is larger
+ * than one page, and there is a regular entry at the target
+ * page address, test to see if the last read was for the
+ * same address.
+ *
+ * If was, evict the page, and satisfy the read from file.
+ * Flag an error if the page was dirty.
+ *
+ * If the last read was for a different page, clip the read
+ * to one page, and satisfy the read from the existing
+ * regular entry.
+ *
+ * 9) If the read is for metadata, is page aligned, is larger
+ * than one page, and there is a multi-page metadata entry
+ * at the target page address, test to see if
+ * pb_ptr->vfd_swmr_write is TRUE.
+ *
+ * If it is, satisfy the read from the multi-page metadata
+ * entry, clipping the read if necessary.
+ *
+ * if pb_ptr->vfd_swmr_write is FALSE, flag an error.
+ *
+ * 10) If the read is for metadata, is page aligned, is no
+ * larger than a page, test to see if the page buffer
+ * contains a page at the target address.
+ *
+ * If it doesn't, load the page and satisfy the read
+ * from it.
+ *
+ * If it contains a regular page entry, satisfy the read
+ * from it.
+ *
+ * If it contains a multipage metadata entry at the target
+ * address, satisfy the read from the multi-page metadata
+ * entry if pb_ptr->vfd_swmr_write is TRUE, and flag an
+ * error otherwise.
+ *
+ * Observe that this function handles casses 1, 2, and 5
+ * directly, calls H5PB_read_raw() for cases 3 & 4, and
+ * calls H5PB_read_meta() for cases 6), 7, 8, 9), and 10).
*
* Return: Non-negative on success/Negative on failure
*
- * Programmer: Mohamad Chaarawi
+ * Programmer: John Mainzer -- 10/11/18
+ *
+ * Changes: None.
*
*-------------------------------------------------------------------------
*/
herr_t
-H5PB_dest(H5F_t *f)
+H5PB_read(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
+ void *buf/*out*/)
{
- herr_t ret_value = SUCCEED; /* Return value */
+ H5PB_t *pb_ptr; /* Page buffer for this file */
+ hbool_t bypass_pb = FALSE; /* Whether to bypass page buffering */
+ herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(FAIL)
/* Sanity checks */
HDassert(f);
+ HDassert(f->shared);
+ HDassert(type != H5FD_MEM_GHEAP);
- /* flush and destroy the page buffer, if it exists */
- if(f->shared->page_buf) {
- H5PB_t *page_buf = f->shared->page_buf;
- H5PB_ud1_t op_data; /* Iteration context */
+ pb_ptr = f->shared->pb_ptr;
+
+ if ( pb_ptr == NULL ) {
+
+ bypass_pb = TRUE; /* case 1) -- page buffer is disabled */
+
+ } else {
+
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+
+ if ( H5FD_MEM_DRAW == type ) { /* raw data read */
+
+ if ( pb_ptr->min_md_pages == pb_ptr->max_pages ) {
- if(H5PB_flush(f) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTFLUSH, FAIL, "can't flush page buffer")
+ /* case 2) -- page buffer configured for metadata only */
+ bypass_pb = TRUE;
- /* Set up context info */
- op_data.page_buf = page_buf;
+ }
+ } else { /* metadata read */
- /* Destroy the skip list containing all the entries in the PB */
- op_data.actual_slist = TRUE;
- if(H5SL_destroy(page_buf->slist_ptr, H5PB__dest_cb, &op_data))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTCLOSEOBJ, FAIL, "can't destroy page buffer skip list")
+ if ( pb_ptr->min_rd_pages == pb_ptr->max_pages ) {
- /* Destroy the skip list containing the new entries */
- op_data.actual_slist = FALSE;
- if(H5SL_destroy(page_buf->mf_slist_ptr, H5PB__dest_cb, &op_data))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTCLOSEOBJ, FAIL, "can't destroy page buffer skip list")
+ /* case 5) -- page buffer configured for raw data only */
+ bypass_pb = TRUE;
+ }
+ }
+ }
- /* Destroy the page factory */
- if(H5FL_fac_term(page_buf->page_fac) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTRELEASE, FAIL, "can't destroy page buffer page factory")
+#ifdef H5_HAVE_PARALLEL
+ /* at present, the page buffer must be disabled in the parallel case.
+ * However, just in case ...
+ */
+ if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
+
+ bypass_pb = TRUE;
- f->shared->page_buf = H5FL_FREE(H5PB_t, page_buf);
} /* end if */
+#endif /* H5_HAVE_PARALLEL */
+
+ if ( bypass_pb ) { /* cases 1, 2. and 5 */
+
+#if VFD_IO
+ if ( H5FD_read(f->shared->lf, type, addr, size, buf) < 0 )
+#else /* VFD_IO */
+ if ( H5F__accum_read(f, type, addr, size, buf) < 0 )
+#endif /* VFD_IO */
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, \
+ "read through metadata accumulator failed")
+
+ /* Update statistics */
+ if ( pb_ptr ) {
+
+ H5PB__UPDATE_STATS_FOR_BYPASS(pb_ptr, type, size);
+ }
+ } else {
+
+ if ( H5FD_MEM_DRAW == type ) { /* cases 3 and 4 */
+
+ if ( H5PB__read_raw(f, type, addr, size, buf) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, \
+ "H5PB_read_raw() failed")
+
+ } else { /* cases 6, 7, 8, 9, and 10 */
+
+ if ( H5PB__read_meta(f, type, addr, size, buf) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, \
+ "H5PB_read_meta() failed")
+ }
+
+ H5PB__UPDATE_STATS_FOR_ACCESS(pb_ptr, type, size);
+ }
done:
+
FUNC_LEAVE_NOAPI(ret_value)
-} /* H5PB_dest */
+
+} /* end H5PB_read() */
/*-------------------------------------------------------------------------
- * Function: H5PB_add_new_page
*
- * Purpose: Add a new page to the new page skip list. This is called
- * from the MF layer when a new page is allocated to
- * indicate to the page buffer layer that a read of the page
- * from the file is not necessary since it's an empty page.
+ * Function: H5PB_remove_entry
*
- * Return: Non-negative on success/Negative on failure
+ * Purpose: Remove possible metadata entry with ADDR from the PB cache.
+ * This is in response to the data corruption bug from fheap.c
+ * with page buffering + page strategy.
+ * Note: Large metadata page bypasses the PB cache.
+ * Note: Update of raw data page (large or small sized) is
+ * handled by the PB cache.
*
- * Programmer: Mohamad Chaarawi
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: Vailin Choi; Feb 2017
+ *
+ * Changes: Reworked function for re-implementation of the page buffer.
+ *
+ * Vailin: I think we need to do this for raw data as well.
+ *
+ * JRM -- 10/23/18
*
*-------------------------------------------------------------------------
*/
-herr_t
-H5PB_add_new_page(H5F_t *f, H5FD_mem_t type, haddr_t page_addr)
+herr_t
+H5PB_remove_entry(const H5F_t *f, haddr_t addr)
{
- H5PB_t *page_buf = f->shared->page_buf;
- H5PB_entry_t *page_entry = NULL; /* pointer to the corresponding page entry */
- herr_t ret_value = SUCCEED; /* Return value */
+ uint64_t page;
+ H5PB_t *pb_ptr = NULL;
+ H5PB_entry_t *entry_ptr = NULL;
+ herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(FAIL)
/* Sanity checks */
- HDassert(page_buf);
+ HDassert(f);
+ HDassert(f->shared);
+ HDassert(f->shared->pb_ptr);
- /* If there is an existing page, this means that at some point the
- * file free space manager freed and re-allocated a page at the same
- * address. No need to do anything here then...
- */
- /* MSC - to be safe, might want to dig in the MF layer and remove
- * the page when it is freed from this list if it still exists and
- * remove this check
- */
- if(NULL == H5SL_search(page_buf->mf_slist_ptr, &(page_addr))) {
- /* Create the new PB entry */
- if(NULL == (page_entry = H5FL_CALLOC(H5PB_entry_t)))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_NOSPACE, FAIL, "memory allocation failed")
-
- /* Initialize page fields */
- page_entry->addr = page_addr;
- page_entry->type = (H5F_mem_page_t)type;
- page_entry->is_dirty = FALSE;
-
- /* Insert entry in skip list */
- if(H5SL_insert(page_buf->mf_slist_ptr, page_entry, &(page_entry->addr)) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_BADVALUE, FAIL, "Can't insert entry in skip list")
- } /* end if */
+ pb_ptr = f->shared->pb_ptr;
+
+ /* Calculate the page offset */
+ page = (addr / pb_ptr->page_size);
+
+ HDassert(addr == page * pb_ptr->page_size);
+
+ /* Search for page in the hash table */
+ H5PB__SEARCH_INDEX(pb_ptr, page, entry_ptr, FAIL)
+
+ if ( entry_ptr ) {
+
+ HDassert(entry_ptr->addr == addr);
+ HDassert(entry_ptr->size == pb_ptr->page_size);
+
+ /* if the entry is dirty, mark it clean before we evict */
+ if ( ( entry_ptr->is_dirty ) &&
+ ( H5PB__mark_entry_clean(pb_ptr, entry_ptr) < 0 ) )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "mark entry clean failed")
+
+ if ( H5PB__evict_entry(pb_ptr, entry_ptr, TRUE) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, "forced eviction failed")
+
+ }
done:
- if(ret_value < 0)
- if(page_entry)
- page_entry = H5FL_FREE(H5PB_entry_t, page_entry);
FUNC_LEAVE_NOAPI(ret_value)
-} /* H5PB_add_new_page */
+
+} /* H5PB_remove_entry */
/*-------------------------------------------------------------------------
+ *
* Function: H5PB_update_entry
*
- * Purpose: In PHDF5, entries that are written by other processes and just
- * marked clean by this process have to have their corresponding
- * pages updated if they exist in the page buffer.
- * This routine checks and update the pages.
+ * Purpose: In PHDF5, metadata cache entries that are written by other
+ * processes are simply marked clean in the current process.
+ * However, if the page buffer is enabled, entries marked
+ * clean must still be written to the page buffer so as to
+ * keep the contents of metadata pages consistent on all
+ * processes.
+ *
+ * Do this as follows:
+ *
+ * 1) Test to see if the page buffer is configured to accept
+ * metadata pages. If it isn't, return.
+ *
+ * 2) Test to see if the page buffer contains the page that
+ * contains the supplied metadata cache entry. If it
+ * doesn't, return.
+ *
+ * 3) Write the supplied buffer to page at the appropriate
+ * offset.
+ *
+ * Note that at present, page buffering is disabled in the
+ * parallel case. Thus this function has not been tested.
*
* Return: Non-negative on success/Negative on failure
*
- * Programmer: Mohamad Chaarawi
+ * Programmer: John Mainzer -- 10/23/18
+ *
+ * Changes: None.
*
*-------------------------------------------------------------------------
*/
herr_t
-H5PB_update_entry(H5PB_t *page_buf, haddr_t addr, size_t size, const void *buf)
+H5PB_update_entry(H5PB_t *pb_ptr, haddr_t addr, size_t size, const void *buf)
{
- H5PB_entry_t *page_entry; /* Pointer to the corresponding page entry */
+ uint64_t page;
+ size_t offset;
+ H5PB_entry_t *entry_ptr = NULL;
haddr_t page_addr;
+ herr_t ret_value = SUCCEED; /* Return value */
- FUNC_ENTER_NOAPI_NOERR
+ FUNC_ENTER_NOAPI(FAIL)
/* Sanity checks */
- HDassert(page_buf);
- HDassert(size <= page_buf->page_size);
+ HDassert(pb_ptr);
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert(size > 0);
+ HDassert(size <= pb_ptr->page_size);
HDassert(buf);
- /* calculate the aligned address of the first page */
- page_addr = (addr / page_buf->page_size) * page_buf->page_size;
+ if ( pb_ptr->min_rd_pages < pb_ptr->max_pages ) {
- /* search for the page and update if found */
- page_entry = (H5PB_entry_t *)H5SL_search(page_buf->slist_ptr, (void *)(&page_addr));
- if(page_entry) {
- haddr_t offset;
+ /* page buffer is configured to accept metadata pages */
- HDassert(addr + size <= page_addr + page_buf->page_size);
- offset = addr - page_addr;
- HDmemcpy((uint8_t *)page_entry->page_buf_ptr + offset, buf, size);
+ /* Calculate the aligned address of the containing page */
+ page = (addr / pb_ptr->page_size);
+ page_addr = page * pb_ptr->page_size;
- /* move to top of LRU list */
- H5PB__MOVE_TO_TOP_LRU(page_buf, page_entry)
- } /* end if */
+ H5PB__SEARCH_INDEX(pb_ptr, page, entry_ptr, FAIL)
+
+ if ( entry_ptr ) {
+
+ HDassert( entry_ptr->is_metadata );
+ HDassert( ! (entry_ptr->is_mpmde) );
+ HDassert(addr + size <= page_addr + pb_ptr->page_size);
+
+ offset = addr - page_addr;
+
+ HDmemcpy(((uint8_t *)(entry_ptr->image_ptr) + offset),
+ buf, size);
+
+ /* should we mark the page dirty? If so, replace the following
+ * with a call to H5PB__mark_entry_dirty()
+ */
+ H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, FAIL)
+ }
+ }
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
- FUNC_LEAVE_NOAPI(SUCCEED)
} /* H5PB_update_entry */
/*-------------------------------------------------------------------------
- * Function: H5PB_remove_entry
*
- * Purpose: Remove possible metadata entry with ADDR from the PB cache.
- * This is in response to the data corruption bug from fheap.c
- * with page buffering + page strategy.
- * Note: Large metadata page bypasses the PB cache.
- * Note: Update of raw data page (large or small sized) is handled by the PB cache.
+ * Function: H5PB_write
*
- * Return: Non-negative on success/Negative on failure
+ * Purpose: Write data into the Page Buffer if practical, and to file
+ * otherwise. Specifically:
*
- * Programmer: Vailin Choi; Feb 2017
+ * 1) If the page buffer is disabled, simply write to the
+ * HDF5 file and return.
+ *
+ * 2) If the write is raw data, and the page buffer is
+ * configured for metadata only (i.e. min_md_pages ==
+ * max_pages), simply write to the HDF5 file and return.
+ *
+ * 3) If the write is raw data, and it of page size or
+ * larger, write directly from the HDF5 file.
+ *
+ * It is possible that the write intersects one or more
+ * pages in the page buffer -- test for this and update
+ * any partially written pages, and evict any pages
+ * that are completely overwritten.
+ *
+ * Note that no pages are inserted into the page buffer in
+ * this case.
+ *
+ * 4) If the write is of raw data, and it is of size less
+ * than the page size, write the page into the page
+ * buffer, loading and inserting pages into the
+ * page buffer as necessary
+ *
+ * 5) If the write is of metadata, and the page buffer is
+ * configured for raw data only (i.e. min_rd_pages ==
+ * max_pages), simply write to the HDF5 file and return.
+ *
+ * 6) If the write is of metadata, the write is larger than
+ * one page, and vfd_swmr_writer is FALSE, simply read
+ * from the HDF5 file. There is no need to check the
+ * page buffer, as metadata is always read atomically,
+ * and entries of this size are not buffered in the page
+ * buffer.
+ *
+ * 7) If the write is of metadata, the write is larger than
+ * one page, and vfd_swmr_writer is TRUE, the write must
+ * buffered in the page buffer until the end of the tick.
+ *
+ * Create a multi-page metadata entry in the page buffer
+ * and copy the write into it. Insert the new entry in
+ * the tick list.
+ *
+ * Test to see if the write of the multi-page metadata
+ * entry must be delayed. If so, place the entry in
+ * the delayed write list. Otherwise, write the multi-page
+ * metadata entry to the HDF5 file.
+ *
+ * 8) If the write is of metadata, and the write is of size
+ * less than or equal to the page size, write the data
+ * into the page buffer, loading and inserting a page
+ * if necessary.
+ *
+ * If, in addition, vfd_swmr_writer is TRUE, add the page
+ * touched by the write to the tick list.
+ *
+ * Observe that this function handles casses 1, 2, 5, and 6
+ * directly, calls H5PB_write_raw() for cases 3 & 4, and
+ * calls H5PB_read_meta() for cases 7, and 8.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer -- 10/11/18
+ *
+ * Changes: None.
*
*-------------------------------------------------------------------------
*/
herr_t
-H5PB_remove_entry(const H5F_t *f, haddr_t addr)
+H5PB_write(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
+ const void *buf)
{
- H5PB_t *page_buf = f->shared->page_buf;
- H5PB_entry_t *page_entry = NULL; /* pointer to the page entry being searched */
- herr_t ret_value = SUCCEED; /* Return value */
+ H5PB_t *pb_ptr; /* Page buffer for this file */
+ hbool_t bypass_pb = FALSE; /* Whether to bypass page buffering */
+ herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(FAIL)
/* Sanity checks */
- HDassert(page_buf);
+ HDassert(f);
+ HDassert(f->shared);
+ HDassert(type != H5FD_MEM_GHEAP);
+
+ pb_ptr = f->shared->pb_ptr;
+
+ if ( pb_ptr == NULL ) {
+
+ bypass_pb = TRUE; /* case 1) -- page buffer is disabled */
+
+ } else {
+
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+
+ if ( H5FD_MEM_DRAW == type ) { /* raw data read */
- /* Search for address in the skip list */
- page_entry = (H5PB_entry_t *)H5SL_search(page_buf->slist_ptr, (void *)(&addr));
+ if ( pb_ptr->min_md_pages == pb_ptr->max_pages ) {
- /* If found, remove the entry from the PB cache */
- if(page_entry) {
- HDassert(page_entry->type != H5F_MEM_PAGE_DRAW);
- if(NULL == H5SL_remove(page_buf->slist_ptr, &(page_entry->addr)))
- HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, FAIL, "Page Entry is not in skip list")
+ /* case 2) -- page buffer configured for metadata only */
+ bypass_pb = TRUE;
- /* Remove from LRU list */
- H5PB__REMOVE_LRU(page_buf, page_entry)
- HDassert(H5SL_count(page_buf->slist_ptr) == page_buf->LRU_list_len);
+ }
+ } else { /* metadata read */
- page_buf->meta_count--;
+ if ( pb_ptr->min_rd_pages == pb_ptr->max_pages ) {
+
+ /* case 5) -- page buffer configured for raw data only */
+ bypass_pb = TRUE;
+
+ } else if ( ( size > pb_ptr->page_size ) &&
+ ( ! ( pb_ptr->vfd_swmr_writer ) ) ) {
+
+ /* case 6) -- md read larger than one page and
+ * pb_ptr->vfd_swmr_writer is FALSE.
+ */
+ bypass_pb = TRUE;
+ }
+ }
+ }
+
+#ifdef H5_HAVE_PARALLEL
+ /* at present, the page buffer must be disabled in the parallel case.
+ * However, just in case ...
+ */
+ if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
+
+ bypass_pb = TRUE;
- page_entry->page_buf_ptr = H5FL_FAC_FREE(page_buf->page_fac, page_entry->page_buf_ptr);
- page_entry = H5FL_FREE(H5PB_entry_t, page_entry);
} /* end if */
+#endif /* H5_HAVE_PARALLEL */
+
+ if ( bypass_pb ) { /* cases 1, 2. 5, and 6 */
+
+#if VFD_IO
+ if ( H5FD_write(f->shared->lf, type, addr, size, buf) < 0 )
+#else /* VFD_IO */
+ if ( H5F__accum_write(f, type, addr, size, buf) < 0 )
+#endif /* VFD_IO */
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, \
+ "write through metadata accumulator failed")
+
+ /* Update statistics */
+ if ( pb_ptr ) {
+
+ H5PB__UPDATE_STATS_FOR_BYPASS(pb_ptr, type, size);
+ }
+ } else {
+
+ if ( H5FD_MEM_DRAW == type ) { /* cases 3 and 4 */
+
+ if ( H5PB__write_raw(f, type, addr, size, buf) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, \
+ "H5PB_read_raw() failed")
+
+ } else { /* cases 7, and 8 */
+
+ if ( H5PB__write_meta(f, type, addr, size, buf) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, \
+ "H5PB_read_meta() failed")
+ }
+
+ H5PB__UPDATE_STATS_FOR_ACCESS(pb_ptr, type, size);
+ }
done:
+
FUNC_LEAVE_NOAPI(ret_value)
-} /* H5PB_remove_entry */
+
+} /* end H5PB_write() */
+/**************************************************************************/
+/***************************** STATIC FUNCTIONS ***************************/
+/**************************************************************************/
+
/*-------------------------------------------------------------------------
- * Function: H5PB_read
*
- * Purpose: Reads in the data from the page containing it if it exists
- * in the PB cache; otherwise reads in the page through the VFD.
+ * Function: H5PB__allocate_page
+ *
+ * Purpose: Allocate an instance of H5PB_entry_t and its associated
+ * buffer. The supplied size must be greater than or
+ * equal to pb_ptr->page_size, and equal to that value if
+ * pb_ptr->vfd_swmr_writer is FALSE.
+ *
+ * The associated buffer is zeroed if clean_image is TRUE.
+ *
+ * Return: Pointer to the newly allocated instance of H5PB_entry_t
+ * on success, and NULL on failure.
+ *
+ * Programmer: John Mainzer -- 10/12/18
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static H5PB_entry_t *
+H5PB__allocate_page(H5PB_t *pb_ptr, size_t size, hbool_t clean_image)
+{
+ H5PB_entry_t *entry_ptr = NULL;
+ void * image_ptr = NULL;
+ H5PB_entry_t *ret_value = NULL; /* Return value */
+
+ FUNC_ENTER_NOAPI(NULL)
+
+ /* sanity checks */
+ HDassert(pb_ptr);
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert(size >= pb_ptr->page_size);
+ HDassert((size == pb_ptr->page_size) || (pb_ptr->vfd_swmr_writer));
+
+ /* allocate the entry and its associated image buffer */
+ if ( NULL == (entry_ptr = H5FL_MALLOC(H5PB_entry_t)))
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_NOSPACE, NULL, \
+ "memory allocation for H5PB_entry_t failed")
+
+ if ( clean_image ) {
+
+ image_ptr = H5MM_calloc(size);
+
+ } else {
+
+ image_ptr = H5MM_malloc(size);
+ }
+
+ if ( NULL == image_ptr )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_NOSPACE, NULL, \
+ "memory allocation for page image failed")
+
+ /* initialize the new page buffer entry */
+ entry_ptr->magic = H5PB__H5PB_ENTRY_T_MAGIC;
+ entry_ptr->pb_ptr = pb_ptr;
+ entry_ptr->addr = HADDR_UNDEF;
+ entry_ptr->page = 0;
+ entry_ptr->size = size;
+ entry_ptr->image_ptr = image_ptr;
+ entry_ptr->mem_type = H5FD_MEM_DEFAULT;
+ entry_ptr->is_metadata = FALSE;
+ entry_ptr->is_mpmde = FALSE;
+ entry_ptr->is_dirty = FALSE;
+
+ /* fields supporting the hash table */
+ entry_ptr->ht_prev = NULL;
+ entry_ptr->ht_next = NULL;
+
+ /* fields supporting replacement policise */
+ entry_ptr->next = NULL;
+ entry_ptr->prev = NULL;
+
+ /* fields supporting VFD SWMR */
+ entry_ptr->is_mpmde = FALSE;
+ entry_ptr->loaded = FALSE;
+ entry_ptr->modified_this_tick = FALSE;
+ entry_ptr->delay_write_until = 0;
+ entry_ptr->tl_next = NULL;
+ entry_ptr->tl_prev = NULL;
+
+ ret_value = entry_ptr;
+
+done:
+
+ if ( NULL == ret_value ) {
+
+ if ( entry_ptr ) {
+
+ entry_ptr->magic = 0;
+ entry_ptr = H5FL_FREE(H5PB_entry_t, entry_ptr);
+ }
+
+ if ( image_ptr ) {
+
+ image_ptr = H5MM_xfree(image_ptr);
+ }
+ } /* end if */
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5PB__allocate_page() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5PB__create_new_page
+ *
+ * Purpose: Create a new page and insert it in the page buffer with
+ * the specified address and type. If entry_ptr_ptr is not
+ * NULL, return a pointer to the new entry in *entry_ptr_ptr.
+ *
+ * Throw an error if a page already exists at the specified
+ * address.
*
* Return: Non-negative on success/Negative on failure
*
- * Programmer: Mohamad Chaarawi
+ * Programmer: John Mainzer -- 10/12/18
+ *
+ * Changes: None.
*
*-------------------------------------------------------------------------
*/
-herr_t
-H5PB_read(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size, void *buf/*out*/)
+herr_t
+H5PB__create_new_page(H5PB_t *pb_ptr, haddr_t addr, size_t size,
+ H5FD_mem_t type, hbool_t clean_image, H5PB_entry_t **entry_ptr_ptr)
{
- H5PB_t *page_buf; /* Page buffering info for this file */
- H5PB_entry_t *page_entry; /* Pointer to the corresponding page entry */
- H5FD_t *file; /* File driver pointer */
- haddr_t first_page_addr, last_page_addr; /* Addresses of the first and last pages covered by I/O */
- haddr_t offset;
- haddr_t search_addr; /* Address of current page */
- hsize_t num_touched_pages; /* Number of pages accessed */
- size_t access_size;
- hbool_t bypass_pb = FALSE; /* Whether to bypass page buffering */
- hsize_t i; /* Local index variable */
- herr_t ret_value = SUCCEED; /* Return value */
+ hbool_t inserted_in_index = FALSE;
+ hbool_t inserted_in_lru = FALSE;
+ uint64_t page;
+ H5PB_entry_t *entry_ptr = NULL;
+ herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(FAIL)
/* Sanity checks */
- HDassert(f);
- HDassert(type != H5FD_MEM_GHEAP);
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
- /* Get pointer to page buffer info for this file */
- page_buf = f->shared->page_buf;
+ page = (uint64_t)addr / (uint64_t)(pb_ptr->page_size);
-#ifdef H5_HAVE_PARALLEL
- if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
-#if 1
- bypass_pb = TRUE;
-#else
- /* MSC - why this stopped working ? */
- int mpi_size;
-
- if((mpi_size = H5F_mpi_get_size(f)) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, "can't retrieve MPI communicator size")
- if(1 != mpi_size)
- bypass_pb = TRUE;
-#endif
- } /* end if */
-#endif
+ HDassert((uint64_t)(addr) == (page * (uint64_t)(pb_ptr->page_size)));
- /* If page buffering is disabled, or the I/O size is larger than that of a
- * single page, or if this is a parallel raw data access, bypass page
- * buffering.
- */
- if(NULL == page_buf || size >= page_buf->page_size ||
- (bypass_pb && H5FD_MEM_DRAW == type)) {
- if(H5F__accum_read(f, type, addr, size, buf) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, "read through metadata accumulator failed")
+ HDassert(size >= pb_ptr->page_size);
+ HDassert((size == pb_ptr->page_size) ||
+ ((pb_ptr->vfd_swmr_writer) && (type != H5FD_MEM_DRAW)));
+ HDassert((NULL == entry_ptr_ptr) || (NULL == *entry_ptr_ptr));
- /* Update statistics */
- if(page_buf) {
- if(type == H5FD_MEM_DRAW)
- page_buf->bypasses[1] ++;
- else
- page_buf->bypasses[0] ++;
- } /* end if */
-
- /* If page buffering is disabled, or if this is a large metadata access,
- * or if this is parallel raw data access, we are done here
+ H5PB__SEARCH_INDEX(pb_ptr, page, entry_ptr, FAIL);
+
+ if ( entry_ptr != NULL ) {
+
+#if 0 /* JRM */
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "page buffer already contains a page at the specified address")
+#else /* JRM */
+ /* this should be an error, but until we update the page allocation
+ * code to tell the page buffer to discard the associated entry
+ * whenever a page is freed, this situation can occur.
+ *
+ * For now, just force the eviction of the existing page.
+ * Delete this code as soon as the paged allocation code is
+ * updated accordingly
*/
- if(NULL == page_buf || (size >= page_buf->page_size && H5FD_MEM_DRAW != type) ||
- (bypass_pb && H5FD_MEM_DRAW == type))
- HGOTO_DONE(SUCCEED)
- } /* end if */
+ if ( H5PB__evict_entry(pb_ptr, entry_ptr, TRUE) < 0 )
- /* Update statistics */
- if(page_buf) {
- if(type == H5FD_MEM_DRAW)
- page_buf->accesses[1]++;
- else
- page_buf->accesses[0]++;
- } /* end if */
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, "forced eviction failed")
- /* Calculate the aligned address of the first page */
- first_page_addr = (addr / page_buf->page_size) * page_buf->page_size;
+#endif /* JRM */
+ }
+
+ entry_ptr = H5PB__allocate_page(pb_ptr, size, clean_image);
+
+ if ( NULL == entry_ptr )
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_NOSPACE, FAIL, \
+ "Can't allocate new page buffer entry")
+
+ /* perform additional initialization */
+ HDassert(entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+ HDassert(entry_ptr->pb_ptr == pb_ptr);
+ entry_ptr->addr = addr;
+ entry_ptr->page = page;
+ HDassert(entry_ptr->size == size);
+ HDassert(entry_ptr->image_ptr);
+ entry_ptr->mem_type = type;
+ entry_ptr->is_metadata = (type != H5FD_MEM_DRAW);
+ entry_ptr->is_mpmde = ((entry_ptr->is_metadata) &&
+ (size > pb_ptr->page_size));
+ entry_ptr->is_dirty = FALSE;
+
+ /* insert in the hash table */
+ H5PB__INSERT_IN_INDEX(pb_ptr, entry_ptr, FAIL)
+ inserted_in_index = TRUE;
- /* For Raw data calculate the aligned address of the last page and
- * the number of pages accessed if more than 1 page is accessed
+ /* insert at the head of the LRU */
+ H5PB__UPDATE_RP_FOR_INSERTION(pb_ptr, entry_ptr, FAIL)
+ inserted_in_lru = TRUE;
+
+ /* updates stats */
+ H5PB__UPDATE_STATS_FOR_INSERTION(pb_ptr, entry_ptr);
+
+ if ( entry_ptr_ptr ) {
+
+ *entry_ptr_ptr = entry_ptr;
+ }
+
+done:
+
+ if ( ret_value < 0 ) {
+
+ if ( entry_ptr ) {
+
+ if ( inserted_in_lru ) {
+
+ H5PB__UPDATE_RP_FOR_EVICTION(pb_ptr, entry_ptr, FAIL);
+ }
+
+ if ( inserted_in_index ) {
+
+ H5PB__DELETE_FROM_INDEX(pb_ptr, entry_ptr, FAIL)
+ }
+
+ H5PB__deallocate_page(entry_ptr);
+ entry_ptr = NULL;
+ }
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5PB_add_new_page */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5PB__deallocate_page
+ *
+ * Purpose: Free the supplied instance of H5PB_entry_t and its
+ * associated buffer. The entry must be clean and removed
+ * from the page buffer before this function is called.
+ *
+ * Return: void
+ *
+ * Programmer: John Mainzer -- 10/12/18
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static void
+H5PB__deallocate_page(H5PB_entry_t *entry_ptr)
+{
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+ /* sanity checks */
+ HDassert(entry_ptr);
+ HDassert(entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+ HDassert(entry_ptr->size > 0);
+ HDassert(entry_ptr->image_ptr);
+ HDassert(!(entry_ptr->is_dirty));
+ HDassert(entry_ptr->ht_next == NULL);
+ HDassert(entry_ptr->ht_prev == NULL);
+ HDassert(entry_ptr->next == NULL);
+ HDassert(entry_ptr->prev == NULL);
+ HDassert(entry_ptr->tl_next == NULL);
+ HDassert(entry_ptr->tl_prev == NULL);
+
+ entry_ptr->magic = 0;
+ entry_ptr->image_ptr = H5MM_xfree(entry_ptr->image_ptr);
+ entry_ptr = H5FL_FREE(H5PB_entry_t, entry_ptr);
+
+ FUNC_LEAVE_NOAPI_VOID
+
+} /* H5PB__deallocate_page() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5PB__evict_entry
+ *
+ * Purpose: Evict the target entry from the from the page buffer, and
+ * de-allocate its associated image and instance of
+ * H5PB_entry_t..
+ *
+ * In general, entries must be clean before they can be
+ * evicted, and the minimum metadata and raw data limits
+ * must be respected. Attempts to evict an entry that
+ * that do not respect these constraints will generate
+ * and error unless the force parameter is TRUE, in which
+ * case, these constraints are igmored.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer -- 10/14/18
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5PB__evict_entry(H5PB_t *pb_ptr, H5PB_entry_t *entry_ptr, hbool_t force)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /* sanity checks */
+ HDassert(pb_ptr);
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert(entry_ptr);
+ HDassert(entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+ HDassert(entry_ptr->size > 0);
+ HDassert(entry_ptr->image_ptr);
+ /* entries on either the tick list or the delayed write
+ * list may not be evicted -- verify this.
*/
- if(H5FD_MEM_DRAW == type) {
- last_page_addr = ((addr + size - 1) / page_buf->page_size) * page_buf->page_size;
-
- /* How many pages does this write span */
- num_touched_pages = (last_page_addr / page_buf->page_size + 1) -
- (first_page_addr / page_buf->page_size);
- if(first_page_addr == last_page_addr) {
- HDassert(1 == num_touched_pages);
- last_page_addr = HADDR_UNDEF;
- } /* end if */
- } /* end if */
- /* Otherwise set last page addr to HADDR_UNDEF */
- else {
- num_touched_pages = 1;
- last_page_addr = HADDR_UNDEF;
- } /* end else */
+ HDassert(!(entry_ptr->modified_this_tick));
+ HDassert(entry_ptr->delay_write_until == 0);
+
+ if ( ( ! force ) && ( entry_ptr->is_dirty ) )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "Attempt to evict a dirty entry");
+
+ if ( ! force ) {
+
+ /* it is OK to evict an metadata page if pb_ptr->curr_md_pages ==
+ * pb_ptr->min_md_pages - 1 if we are about to replace it with another
+ * metadata page.
+ *
+ * Similarly, it is OK to evict an raw data page if
+ * pb_ptr->curr_rd_pages == pb_ptr->min_rd_pages - 1 if we are
+ * about to replace it with another raw data page.
+ *
+ * Assume sanity checks have been made before this call, and
+ * allow the above without testing the intended replacement.
+ */
+ if ( ( entry_ptr->is_metadata ) &&
+ ( pb_ptr->curr_md_pages < pb_ptr->min_md_pages ) ) {
- /* Translate to file driver I/O info object */
- file = f->shared->lf;
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "Attempt to violate min_md_pages");
+
+ } else if ( ( ! entry_ptr->is_metadata ) &&
+ ( pb_ptr->curr_rd_pages < pb_ptr->min_rd_pages ) ) {
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "Attempt to violate min_rd_pages");
+ }
+ } else if ( ( entry_ptr->is_dirty ) &&
+ ( H5PB__mark_entry_clean(pb_ptr, entry_ptr) < 0 ) ) {
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, "mark entry clean failed")
+ }
+
+ /* remove the entry from the LRU */
+ H5PB__UPDATE_RP_FOR_EVICTION(pb_ptr, entry_ptr, FAIL)
+
+ /* remove the entry from the hash table */
+ H5PB__DELETE_FROM_INDEX(pb_ptr, entry_ptr, FAIL)
+
+ /* update stats for eviction */
+ H5PB__UPDATE_STATS_FOR_EVICTION(pb_ptr, entry_ptr)
+
+ /* deallocate the page */
+ H5PB__deallocate_page(entry_ptr);
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5PB__evict_entry() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5PB__flush_entry
+ *
+ * Purpose: Flush the target entry to file.
+ *
+ * Under normal circumstances, the entry will be in the
+ * replacement policy. In this, also update the replacement
+ * policy for flush.
+ *
+ * If pb_ptr->vfd_swmr_writer, it is possible that the target
+ * is a multi-page metadata entry. In this case, the entry
+ * is not in the replacement policy, and thus the policy
+ * should not be updated.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer -- 10/14/18
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5PB__flush_entry(H5F_t *f, H5PB_t *pb_ptr, H5PB_entry_t *entry_ptr)
+{
+ hbool_t skip_write = FALSE;
+ size_t write_size;
+ haddr_t eoa; /* Current EOA for the file */
+ H5FD_t *file; /* file driver */
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /* sanity checks */
+ HDassert(f);
+ HDassert(f->shared);
+ HDassert(f->shared->lf);
+ HDassert(pb_ptr);
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert(entry_ptr);
+ HDassert(entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+ HDassert(entry_ptr->size > 0);
+ HDassert(entry_ptr->size >= pb_ptr->page_size);
+ HDassert((entry_ptr->size == pb_ptr->page_size) || (entry_ptr->is_mpmde));
+ HDassert(entry_ptr->image_ptr);
+ HDassert(entry_ptr->is_dirty);
+ HDassert((pb_ptr->vfd_swmr_writer) || (!(entry_ptr->is_mpmde)));
+ HDassert( ( ! (pb_ptr->vfd_swmr_writer) ) ||
+ ( (pb_ptr->cur_tick) >= (entry_ptr->delay_write_until) ) );
+
+ /* Retrieve the 'eoa' for the file */
+ if ( HADDR_UNDEF == (eoa = H5F_get_eoa(f, entry_ptr->mem_type)) )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, \
+ "driver get_eoa request failed")
+
+#if 0 /* JRM */
+ /* TODO: update the free space manager to inform the page buffer when
+ * space is de-allocated so that the following assertions will be
+ * true in all cases.
+ */
+
+ /* Verify that the base addresss of the page is within the EOA. If it
+ * isn't, the associated page has been discarded and should have been
+ * removed from the page buffer. This is a bug in the HDF5 library, so
+ * an assertion is adequate here.
+ */
+ HDassert( eoa > entry_ptr->addr );
+
+ /* Space at the end of the file should be allocate in increments of
+ * pages. Thus the entire page should be within the EOA. Again,
+ * an assertion is adequate here.
+ */
+ HDassert( eoa >= entry_ptr->addr + entry_ptr->size );
+#else /* JRM */
+ if ( eoa < entry_ptr->addr ) {
+
+ skip_write = TRUE;
- /* Copy raw data from dirty pages into the read buffer if the read
- request spans pages in the page buffer*/
- if(H5FD_MEM_DRAW == type && size >= page_buf->page_size) {
- H5SL_node_t *node;
+ } else if ( eoa < entry_ptr->addr + entry_ptr->size ) {
- /* For each touched page in the page buffer, check if it
- * exists in the page Buffer and is dirty. If it does, we
- * update the buffer with what's in the page so we get the up
- * to date data into the buffer after the big read from the file.
+ /* adjust the size of the write so that the write
+ * will not extend beyond EOA.
*/
- node = H5SL_find(page_buf->slist_ptr, (void *)(&first_page_addr));
- for(i = 0; i < num_touched_pages; i++) {
- search_addr = i*page_buf->page_size + first_page_addr;
+ write_size = (size_t)(eoa - entry_ptr->addr);
- /* if we still haven't located a starting page, search again */
- if(!node && i!=0)
- node = H5SL_find(page_buf->slist_ptr, (void *)(&search_addr));
+ } else {
- /* if the current page is in the Page Buffer, do the updates */
- if(node) {
- page_entry = (H5PB_entry_t *)H5SL_item(node);
+ write_size = entry_ptr->size;
+ }
+
+#endif /* JRM */
- HDassert(page_entry);
- /* If the current page address falls out of the access
- block, then there are no more pages to go over */
- if(page_entry->addr >= addr + size)
- break;
+ /* flush the entry */
+ if ( ! skip_write ) {
+#if VFD_IO /* JRM */
+ file = f->shared->lf;
- HDassert(page_entry->addr == search_addr);
+ if ( H5FD_write(file, entry_ptr->mem_type, entry_ptr->addr,
+ write_size, entry_ptr->image_ptr) < 0 )
+#else /* VFD_IO */ /* JRM */
+ if ( H5F__accum_write(f, entry_ptr->mem_type, entry_ptr->addr,
+ write_size, entry_ptr->image_ptr) < 0 )
+#endif /* VFD_IO */ /* JRM */
- if(page_entry->is_dirty) {
- /* special handling for the first page if it is not a full page access */
- if(i == 0 && first_page_addr != addr) {
- offset = addr - first_page_addr;
- HDassert(page_buf->page_size > offset);
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, "file write failed")
+ }
- HDmemcpy(buf, (uint8_t *)page_entry->page_buf_ptr + offset,
- page_buf->page_size - (size_t)offset);
+ /* mark the entry clean */
+ entry_ptr->is_dirty = FALSE;
- /* move to top of LRU list */
- H5PB__MOVE_TO_TOP_LRU(page_buf, page_entry)
- } /* end if */
- /* special handling for the last page if it is not a full page access */
- else if(num_touched_pages > 1 && i == num_touched_pages-1 && search_addr < addr+size) {
- offset = (num_touched_pages-2)*page_buf->page_size +
- (page_buf->page_size - (addr - first_page_addr));
- HDmemcpy((uint8_t *)buf + offset, page_entry->page_buf_ptr,
- (size_t)((addr + size) - last_page_addr));
+ /* if the entry is on the LRU, update the replacement policy */
+ if ( ! (entry_ptr->is_mpmde) ) {
- /* move to top of LRU list */
- H5PB__MOVE_TO_TOP_LRU(page_buf, page_entry)
- } /* end else-if */
- /* copy the entire fully accessed pages */
- else {
- offset = i*page_buf->page_size;
-
- HDmemcpy((uint8_t *)buf+(i*page_buf->page_size) , page_entry->page_buf_ptr,
- page_buf->page_size);
- } /* end else */
- } /* end if */
- node = H5SL_next(node);
- } /* end if */
- } /* end for */
- } /* end if */
- else {
- /* A raw data access could span 1 or 2 PB entries at this point so
- we need to handle that */
- HDassert(1 == num_touched_pages || 2 == num_touched_pages);
- for(i = 0 ; i < num_touched_pages; i++) {
- haddr_t buf_offset;
-
- /* Calculate the aligned address of the page to search for it in the skip list */
- search_addr = (0==i ? first_page_addr : last_page_addr);
-
- /* Calculate the access size if the access spans more than 1 page */
- if(1 == num_touched_pages)
- access_size = size;
- else
- access_size = (0 == i ? (size_t)((first_page_addr + page_buf->page_size) - addr) : (size - access_size));
-
- /* Lookup the page in the skip list */
- page_entry = (H5PB_entry_t *)H5SL_search(page_buf->slist_ptr, (void *)(&search_addr));
-
- /* if found */
- if(page_entry) {
- offset = (0 == i ? addr - page_entry->addr : 0);
- buf_offset = (0 == i ? 0 : size - access_size);
-
- /* copy the requested data from the page into the input buffer */
- HDmemcpy((uint8_t *)buf + buf_offset, (uint8_t *)page_entry->page_buf_ptr + offset, access_size);
-
- /* Update LRU */
- H5PB__MOVE_TO_TOP_LRU(page_buf, page_entry)
-
- /* Update statistics */
- if(type == H5FD_MEM_DRAW)
- page_buf->hits[1]++;
- else
- page_buf->hits[0]++;
- } /* end if */
- /* if not found */
- else {
- void *new_page_buf = NULL;
- size_t page_size = page_buf->page_size;
- haddr_t eoa;
-
- /* make space for new entry */
- if((H5SL_count(page_buf->slist_ptr) * page_buf->page_size) >= page_buf->max_size) {
- htri_t can_make_space;
-
- /* check if we can make space in page buffer */
- if((can_make_space = H5PB__make_space(f, page_buf, type)) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_NOSPACE, FAIL, "make space in Page buffer Failed")
-
- /* if make_space returns 0, then we can't use the page
- buffer for this I/O and we need to bypass */
- if(0 == can_make_space) {
- /* make space can't return FALSE on second touched page since the first is of the same type */
- HDassert(0 == i);
-
- /* read entire block from VFD and return */
- if(H5FD_read(file, type, addr, size, buf) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, "driver read request failed")
-
- /* Break out of loop */
- break;
- } /* end if */
- } /* end if */
-
- /* Read page from VFD */
- if(NULL == (new_page_buf = H5FL_FAC_MALLOC(page_buf->page_fac)))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTALLOC, FAIL, "memory allocation failed for page buffer entry")
-
- /* Read page through the VFD layer, but make sure we don't read past the EOA. */
-
- /* Retrieve the 'eoa' for the file */
- if(HADDR_UNDEF == (eoa = H5F_get_eoa(f, type)))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, "driver get_eoa request failed")
-
- /* If the entire page falls outside the EOA, then fail */
- if(search_addr > eoa)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_BADVALUE, FAIL, "reading an entire page that is outside the file EOA")
-
- /* Adjust the read size to not go beyond the EOA */
- if(search_addr + page_size > eoa)
- page_size = (size_t)(eoa - search_addr);
-
- /* Read page from VFD */
- if(H5FD_read(file, type, search_addr, page_size, new_page_buf) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, "driver read request failed")
-
- /* Copy the requested data from the page into the input buffer */
- offset = (0 == i ? addr - search_addr : 0);
- buf_offset = (0 == i ? 0 : size - access_size);
- HDmemcpy((uint8_t *)buf + buf_offset, (uint8_t *)new_page_buf + offset, access_size);
-
- /* Create the new PB entry */
- if(NULL == (page_entry = H5FL_CALLOC(H5PB_entry_t)))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_NOSPACE, FAIL, "memory allocation failed")
-
- page_entry->page_buf_ptr = new_page_buf;
- page_entry->addr = search_addr;
- page_entry->type = (H5F_mem_page_t)type;
- page_entry->is_dirty = FALSE;
-
- /* Insert page into PB */
- if(H5PB__insert_entry(page_buf, page_entry) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTSET, FAIL, "error inserting new page in page buffer")
-
- /* Update statistics */
- if(type == H5FD_MEM_DRAW)
- page_buf->misses[1]++;
- else
- page_buf->misses[0]++;
- } /* end else */
- } /* end for */
- } /* end else */
+ H5PB__UPDATE_RP_FOR_FLUSH(pb_ptr, entry_ptr, FAIL)
+ }
+
+ /* update stats for flush */
+ H5PB__UPDATE_STATS_FOR_FLUSH(pb_ptr, entry_ptr)
done:
+
FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5PB_read() */
+
+} /* H5PB__flush_entry() */
/*-------------------------------------------------------------------------
- * Function: H5PB_write
*
- * Purpose: Write data into the Page Buffer. If the page exists in the
- * cache, update it; otherwise read it from disk, update it, and
- * insert into cache.
+ * Function: H5PB__load_page
*
- * Return: Non-negative on success/Negative on failure
+ * Purpose: Load the page with the specified base address and insert
+ * it into the page buffer. If necessary and possible, make
+ * space for the new page first.
*
- * Programmer: Mohamad Chaarawi
+ * Note that the size of the page is always pb_ptr->page_size,
+ * even in the VFD SWMR case, as in this context, multi-page
+ * metadata entries are always written in full, and they
+ * may only enter the page buffer as the result of a write.
+ *
+ * Return: SUCCEED if no errors are encountered, and
+ * FAIL otherwise.
+ *
+ * Programmer: John Mainzer -- 10/18/18
+ *
+ * Changes: None.
*
*-------------------------------------------------------------------------
*/
-herr_t
-H5PB_write(H5F_t *f, H5FD_mem_t type, haddr_t addr,
- size_t size, const void *buf)
+static herr_t
+H5PB__load_page(H5F_t *f, H5PB_t *pb_ptr, haddr_t addr, H5FD_mem_t type,
+ H5PB_entry_t **entry_ptr_ptr)
{
- H5PB_t *page_buf; /* Page buffering info for this file */
- H5PB_entry_t *page_entry; /* Pointer to the corresponding page entry */
+ hbool_t skip_read = FALSE;
+ haddr_t eoa;
+ haddr_t eof = HADDR_UNDEF;
+ H5PB_entry_t *entry_ptr = NULL;
+ void *image_ptr = NULL;
H5FD_t *file; /* File driver pointer */
- haddr_t first_page_addr, last_page_addr; /* Addresses of the first and last pages covered by I/O */
- haddr_t offset;
- haddr_t search_addr; /* Address of current page */
- hsize_t num_touched_pages; /* Number of pages accessed */
- size_t access_size;
- hbool_t bypass_pb = FALSE; /* Whether to bypass page buffering */
- hsize_t i; /* Local index variable */
- herr_t ret_value = SUCCEED; /* Return value */
+ herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(FAIL)
- /* Sanity checks */
+ /* sanity checks */
HDassert(f);
+ HDassert(f->shared);
+ HDassert(f->shared->lf);
- /* Get pointer to page buffer info for this file */
- page_buf = f->shared->page_buf;
+ file = f->shared->lf;
-#ifdef H5_HAVE_PARALLEL
- if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
-#if 1
- bypass_pb = TRUE;
-#else
- /* MSC - why this stopped working ? */
- int mpi_size;
-
- if((mpi_size = H5F_mpi_get_size(f)) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, "can't retrieve MPI communicator size")
- if(1 != mpi_size)
- bypass_pb = TRUE;
-#endif
- } /* end if */
-#endif
+ HDassert(pb_ptr);
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert((entry_ptr_ptr == NULL) || (*entry_ptr_ptr == NULL));
+
+ /* Retrieve the 'eoa' for the file */
+ if ( HADDR_UNDEF == (eoa = H5F_get_eoa(f, type)))
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, \
+ "driver get_eoa request failed")
+
+ if ( addr + ((haddr_t)(pb_ptr->page_size)) > eoa )
- /* If page buffering is disabled, or the I/O size is larger than that of a
- * single page, or if this is a parallel raw data access, bypass page
- * buffering.
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "Attempt to load page that extends past EOA")
+
+ if ( HADDR_UNDEF == (eof = H5FD_get_eof(f->shared->lf, H5FD_MEM_DEFAULT)) )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, \
+ "driver get_eof request failed")
+
+ /* It is possible that this page been allocated but not
+ * written. Skip the read if addr > EOF. In this case, tell
+ * H5PB__create_new_page() to zero the page image.
*/
- if(NULL == page_buf || size >= page_buf->page_size || bypass_pb) {
- if(H5F__accum_write(f, type, addr, size, buf) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, "write through metadata accumulator failed")
+ skip_read = (addr >= eof);
- /* Update statistics */
- if(page_buf) {
- if(type == H5FD_MEM_DRAW || type == H5FD_MEM_GHEAP)
- page_buf->bypasses[1]++;
- else
- page_buf->bypasses[0]++;
- } /* end if */
-
- /* If page buffering is disabled, or if this is a large metadata access,
- * or if this is a parallel raw data access, we are done here
- */
- if(NULL == page_buf || (size >= page_buf->page_size && H5FD_MEM_DRAW != type) ||
- (bypass_pb && H5FD_MEM_DRAW == type))
- HGOTO_DONE(SUCCEED)
-#ifdef H5_HAVE_PARALLEL
- if(bypass_pb) {
- if(H5PB_update_entry(page_buf, addr, size, buf) > 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTUPDATE, FAIL, "failed to update PB with metadata cache")
- HGOTO_DONE(SUCCEED)
- } /* end if */
-#endif
- } /* end if */
+ /* make space in the page buffer if necessary */
+ if ( ( pb_ptr->curr_pages >= pb_ptr->max_pages ) &&
+ ( H5PB__make_space(f, pb_ptr, type) < 0 ) )
- /* Update statistics */
- if(page_buf) {
- if(type == H5FD_MEM_DRAW || type == H5FD_MEM_GHEAP)
- page_buf->accesses[1]++;
- else
- page_buf->accesses[0]++;
- } /* end if */
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "H5PB__make_space() reports an error")
- /* Calculate the aligned address of the first page */
- first_page_addr = (addr / page_buf->page_size) * page_buf->page_size;
- /* For raw data calculate the aligned address of the last page and
- * the number of pages accessed if more than 1 page is accessed
+ /* Create a new page buffer page and insert it into the page buffer */
+ if ( H5PB__create_new_page(pb_ptr, addr, (size_t)(pb_ptr->page_size),
+ type, skip_read, &entry_ptr) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "can't create new page buffer page")
+
+ HDassert(entry_ptr);
+ HDassert(entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+ HDassert(entry_ptr->addr == addr);
+
+ image_ptr = entry_ptr->image_ptr;
+
+ HDassert(image_ptr);
+
+ /* Read the contents of the page from file, and store it in the
+ * image buffer associated with the new entry.
*/
- if(H5FD_MEM_DRAW == type) {
- last_page_addr = (addr + size - 1) / page_buf->page_size * page_buf->page_size;
-
- /* how many pages does this write span */
- num_touched_pages = (last_page_addr/page_buf->page_size + 1) -
- (first_page_addr / page_buf->page_size);
- if(first_page_addr == last_page_addr) {
- HDassert(1 == num_touched_pages);
- last_page_addr = HADDR_UNDEF;
- } /* end if */
- } /* end if */
- /* Otherwise set last page addr to HADDR_UNDEF */
- else {
- num_touched_pages = 1;
- last_page_addr = HADDR_UNDEF;
- } /* end else */
+#if VFD_IO /* JRM */
+ if ( ( ! skip_read ) &&
+ ( H5FD_read(file, type, addr, entry_ptr->size, image_ptr) < 0 ) )
+#else /* VFD_IO */ /* JRM */
+ if ( ( ! skip_read ) &&
+ ( H5F__accum_read(f, type, addr, entry_ptr->size, image_ptr) < 0 ) )
+#endif /* VFD_IO */ /* JRM */
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, \
+ "driver read request failed")
+
+ H5PB__UPDATE_STATS_FOR_LOAD(pb_ptr, entry_ptr)
+
+ if ( entry_ptr_ptr ) {
+
+ *entry_ptr_ptr = entry_ptr;
+ }
+
+done:
+
+ /* add cleanup in case of failure */
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5PB__load_page() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5PB__make_space
+ *
+ * Purpose: Evict one or more pages from the page buffer so as to
+ * reduce the size of the page buffer to pb_ptr->max_pages - 1.
+ * if possible.
+ *
+ * Note that the function must not be called under
+ * non-sencicle conditions -- thus if either
+ *
+ * 1) the inserted type is metadata and min_rd_pages ==
+ * max_pages, or
+ *
+ * 2) the inserted type is raw data and min_md_pages ==
+ * max_pages
+ *
+ * holds, the function has been called in error, and an
+ * assertion failure is appropriate.
+ *
+ * If the page buffer is below its maximum size, we are
+ * done, and the function simply returns.
+ *
+ * Otherwise, scan upwards from the bottom of the LRU list,
+ * examining each entry in turn.
+ *
+ * If the entry is dirty, flush it, move it to the top of the
+ * LRU, and continue with the scan. Note in the VFD SWMR case,
+ * we do not have to concern ourselves with delayed writes in
+ * this context, as all entries which are subject to delayed
+ * writes must reside on the delayed write list, not the LRU list.
+ *
+ * If the entry is:
+ *
+ * 1) clean
+ *
+ * 2) either:
+ *
+ * a) the target entry is metadata and
+ * curr_md_pages > min_md_pages.
+ *
+ * b) the target entry is raw data and
+ * curr_rd_pages > min_rd_pages.
+ *
+ * c) the target entry is metadata, the inserted_type
+ * is metadata, and curr_md_pages == min_md_pages.
+ *
+ * d) the target entry is raw data, the inserted_type
+ * is raw data, and curr_rd_pages == min_rd_pages.
+ *
+ * 3) The entry is not on the tick list (which can only
+ * happen if pb_ptr->vfd_swmr_writer is TRUE).
+ *
+ * evict the entry and test to see if pb_ptr->curr_pages <
+ * pb_ptr->max_pages. If it is, return. Otherwise, continue
+ * the scan until either the above condidtion is fulfilled,
+ * or the head of the LRU is reach.
+ *
+ * Under normal circumstances, it should always be possible
+ * to reduce the size of the page buffer below pb_ptr->max_pages.
+ * However, due to prohibition on evicting entries on the
+ * tick list, and either flushing or evicting entries on the
+ * delayed write list, this will not in general be the case
+ * if pb_ptr->vfd_swmr_writer is TRUE. In this case, the
+ * page buffer may exceed its maximum size by an arbitrary
+ * amount.
+ *
+ * If this situation occurs with any regularity, we will
+ * need a mechanism to avoid attempts to make space when
+ * it is not possible to do so.
+ *
+ * Return: SUCCEED if no errors are encountered, and
+ * FAIL otherwise.
+ *
+ * Programmer: John Mainzer -- 10/14/18
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5PB__make_space(H5F_t *f, H5PB_t *pb_ptr, H5FD_mem_t inserted_type)
+{
+ hbool_t inserting_md;
+ H5PB_entry_t *search_ptr;
+ H5PB_entry_t *flush_ptr;
+ H5PB_entry_t *evict_ptr;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /* sanity checks */
+ HDassert(f);
+ HDassert(pb_ptr);
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert(pb_ptr->min_md_pages + pb_ptr->min_rd_pages <= pb_ptr->max_pages);
+
+ inserting_md = ( H5FD_MEM_DRAW != inserted_type );
+
+ if ( ( inserting_md ) && ( pb_ptr->min_rd_pages == pb_ptr->max_pages ) )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL,
+ "can't make space for metadata -- pb config for raw data only")
+
+ if ( ( ! inserting_md ) && ( pb_ptr->min_md_pages == pb_ptr->max_pages ) )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL,
+ "can't make space for raw data -- pb config for metadata only")
+
+ search_ptr = pb_ptr->LRU_tail_ptr;
+
+ while ( ( search_ptr ) && ( pb_ptr->curr_pages >= pb_ptr->max_pages ) ) {
+
+ HDassert(search_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+
+ if ( search_ptr->modified_this_tick ) { /* entry is on tick list */
+
+ search_ptr = search_ptr->prev;
+ H5PB__UPDATE_STATS_FOR_LRU_TL_SKIP(pb_ptr);
+
+ } else if ( ( inserting_md ) &&
+ ( ! (search_ptr->is_metadata) ) &&
+ ( pb_ptr->curr_rd_pages <= pb_ptr->min_rd_pages ) ) {
+
+ search_ptr = search_ptr->prev;
+ H5PB__UPDATE_STATS_FOR_LRU_RD_SKIP(pb_ptr);
+
+ } else if ( ( ! inserting_md ) &&
+ ( search_ptr->is_metadata ) &&
+ ( pb_ptr->curr_md_pages <= pb_ptr->min_md_pages ) ) {
+
+ search_ptr = search_ptr->prev;
+ H5PB__UPDATE_STATS_FOR_LRU_MD_SKIP(pb_ptr);
+
+ } else if ( search_ptr->is_dirty ) {
+
+ /* One can make the argument that we should test for dirty
+ * entries first, instead of skipping potentially dirty
+ * entries in the above clauses. However, I suspect that
+ * this would result in excessive flushes. Lets try it
+ * this way for now.
+ */
+
+ flush_ptr = search_ptr;
+
+ /* if the *search_ptr has a predecessor in the LRU,
+ * set set search_ptr equal to search_ptr->prev. Otherwise,
+ * leave search_ptr unchanged, so that it can be examined
+ * on the next pass through the while loop after it has been
+ * flushed.
+ */
+ if ( search_ptr->prev ) {
+
+ search_ptr = search_ptr->prev;
+ }
+
+ if ( H5PB__flush_entry(f, pb_ptr, flush_ptr) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, \
+ "Can't flush entry")
+
+ } else { /* evict the entry */
+
+ evict_ptr = search_ptr;
+ search_ptr = search_ptr->prev;
+ if ( H5PB__evict_entry(pb_ptr, evict_ptr, FALSE) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, \
+ "Can't evict entry")
+ }
+ }
+
+ HDassert( ( search_ptr == NULL ) ||
+ ( pb_ptr->curr_pages < pb_ptr->max_pages ) );
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5PB__make_space() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5PB__mark_entry_clean
+ *
+ * Purpose: Mark the target entry clean
+ *
+ * This function is typically used when an entry has been
+ * completely overwritten and is about to be evicted. In
+ * this case, the entry must be marked clean to avoid
+ * sanity check failures on evictions.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer -- 10/14/18
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5PB__mark_entry_clean(H5PB_t *pb_ptr, H5PB_entry_t *entry_ptr)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /* sanity checks */
+ HDassert(pb_ptr);
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert(entry_ptr);
+ HDassert(entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+ HDassert(entry_ptr->size > 0);
+ HDassert(entry_ptr->size >= pb_ptr->page_size);
+ HDassert((entry_ptr->size == pb_ptr->page_size) || (entry_ptr->is_mpmde));
+ HDassert(entry_ptr->image_ptr);
+ HDassert((pb_ptr->vfd_swmr_writer) || (!(entry_ptr->is_mpmde)));
+
+ /* mark the entry clean */
+ entry_ptr->is_dirty = FALSE;
+
+ /* delete this once we start tracking clean and dirty entry is the hash
+ * table.
+ */
+ if ( ! (entry_ptr->is_mpmde) ) {
+
+ H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, FAIL)
+ }
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5PB__mark_entry_clean() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5PB__mark_entry_dirty
+ *
+ * Purpose: Mark the target entry as dirty.
+ *
+ * Under normal circumstances, the entry will be in the
+ * replacement policy. In this, also update the replacement
+ * policy for and access.
+ *
+ * If pb_ptr->vfd_swmr_writer, it is possible that the target
+ * is a multi-page metadata entry. In this case, the entry
+ * is not in the replacement policy, and thus the policy
+ * should not be updated.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer -- 10/14/18
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5PB__mark_entry_dirty(H5PB_t *pb_ptr, H5PB_entry_t *entry_ptr)
+{
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /* sanity checks */
+ HDassert(pb_ptr);
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert(entry_ptr);
+ HDassert(entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+ HDassert(entry_ptr->size > 0);
+ HDassert(entry_ptr->size >= pb_ptr->page_size);
+ HDassert((entry_ptr->size == pb_ptr->page_size) || (entry_ptr->is_mpmde));
+ HDassert(entry_ptr->image_ptr);
+ HDassert((pb_ptr->vfd_swmr_writer) || (!(entry_ptr->is_mpmde)));
+
+ /* mark the entry dirty */
+ entry_ptr->is_dirty = TRUE;
+
+ /* if the entry is on the LRU, update the replacement policy */
+ if ( ( ! (entry_ptr->is_mpmde) ) &&
+ ( entry_ptr->delay_write_until == 0 ) ) {
+
+ H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, FAIL)
+ }
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5PB__mark_entry_dirty() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5PB__read_meta
+ *
+ * Purpose: Satisfy a metadata read in cases 7, 8, 9, and 10)
+ * H5PB_read(). Specifically:
+ *
+ * 6) If the read is for metadata and not page aligned, clip
+ * the read to the end of the current page if necessary.
+ * Load the relevant page if necessary and satisfy the
+ * read from the page buffer. Note that it there is an
+ * existing page, it must not be a multi-page metadata
+ * entry. It it is, flag an error.
+ *
+ * 7) If the read is for metadata, is page aligned, is larger
+ * than one page, and there is no entry in the page buffer,
+ * satisfy the read from the file
+ *
+ * 8) If the read is for metadata, is page aligned, is larger
+ * than one page, and there is a regular entry at the target
+ * page address, test to see if the last read was for the
+ * same address.
+ *
+ * If was, evict the page, and satisfy the read from file.
+ * Flag an error if the page was dirty.
+ *
+ * If the last read was for a different page, clip the read
+ * to one page, and satisfy the read from the existing
+ * regular entry.
+ *
+ * 9) If the read is for metadata, is page aligned, is larger
+ * than one page, and there is a multi-page metadata entry
+ * at the target page address, test to see if
+ * pb_ptr->vfd_swmr_write is TRUE.
+ *
+ * If it is, satisfy the read from the multi-page metadata
+ * entry, clipping the read if necessary.
+ *
+ * if pb_ptr->vfd_swmr_write is FALSE, flag an error.
+ *
+ * 10) If the read is for metadata, is page aligned, is no
+ * larger than a page, test to see if the page buffer
+ * contains a page at the target address.
+ *
+ * If it doesn't, load the page and satisfy the read
+ * from it.
+ *
+ * If it contains a regular page entry, satisfy the read
+ * from it.
+ *
+ * If it contains a multipage metadata entry at the target
+ * address, satisfy the read from the multi-page metadata
+ * entry if pb_ptr->vfd_swmr_write is TRUE, and flag an
+ * error otherwise.
+ *
+ * The above case analysis may be a bit hard to read. If so,
+ * the table shown below may help to clarify. Here:
+ *
+ * P/A == page aligned
+ * size > PL == size > page length
+ * PA == previous address
+ * A == current address
+ *
+ * In the entry exists column:
+ *
+ * N == no entry
+ * R == regular (1 page) entry
+ * MPMDE == multi-page metadata entry
+ *
+ * | size | entry | VFD | |
+ * P/A: | > PL | exists | SWMR | PA == A | Comments:
+ * ------+------+--------+------+---------+-------------------------------------
+ * N | X | N || R | X | X | Clip read to page boundary if
+ * | | | | | necessary
+ * | | | | | Load entry if necessary
+ * | | | | | Satisfy read from entry (case 6)
+ * ------+------+--------+------+---------+-------------------------------------
+ * N | X | MPMDE | X | X | Error (case 6)
+ * ------+------+--------+------+---------+-------------------------------------
+ * | | | | |
+ * ------+------+--------+------+---------+-------------------------------------
+ * Y | Y | N | X | X | Satisfy read from file (case 7)
+ * ------+------+--------+------+---------+-------------------------------------
+ * Y | Y | R | X | N | Clip read to page boundary
+ * | | | | | Satisfy read from entry (case 8)
+ * ------+------+--------+------+---------+-------------------------------------
+ * Y | Y | R | X | Y | Evict entry
+ * | | | | | (must be clean -- flag error if not)
+ * | | | | | Satisfy read from file (case 8)
+ * ------+------+--------+------+---------+-------------------------------------
+ * Y | Y | MPMDE | N | X | Error (case 9)
+ * ------+------+--------+------+---------+-------------------------------------
+ * Y | Y | MPMDE | Y | X | Clip read to MPE size if required.
+ * | | | | | Satify read from MPE (case 9)
+ * ------+------+--------+------+---------+-------------------------------------
+ * | | | | |
+ * ------+------+--------+------+---------+-------------------------------------
+ * Y | N | N | X | X | Load entry
+ * | | | | | Satisfy read from entry (case 10)
+ * ------+------+--------+------+---------+-------------------------------------
+ * Y | N | R | X | X | Satisfy read from entry (case 10)
+ * ------+------+--------+------+---------+-------------------------------------
+ * Y | N | MPMDE | Y | X | Satisfy read from entry (case 10)
+ * ------+------+--------+------+---------+-------------------------------------
+ * Y | N | MPMDE | N | X | Error (case 10)
+ * ------+------+--------+------+---------+-------------------------------------
+ *
+ * Observe that the above cases imply that:
+ *
+ * 1) The page buffer is defined.
+ *
+ * 2) The page buffer has been configured to accept at least
+ * one page of metadata.
+ *
+ * 3) This is a metadata read.
+ *
+ * Note also that if the metadata read is of size
+ * no larger than page size, it may not cross page
+ * boundaries.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer -- 10/11/18
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5PB__read_meta(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
+ void *buf/*out*/)
+{
+ H5PB_t *pb_ptr; /* Page buffer for this file */
+ H5PB_entry_t *entry_ptr; /* Pointer to page buffer entry */
+ H5FD_t *file; /* File driver pointer */
+ uint64_t page; /* page offset of addr */
+ haddr_t page_addr; /* page containg addr */
+ static haddr_t prev_addr = HADDR_UNDEF; /* addr of last call */
+ size_t offset; /* offset of read in page */
+ size_t clipped_size; /* possibley clipped size */
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /* Sanity checks */
+ HDassert(f);
+ HDassert(f->shared);
+ HDassert(f->shared->pb_ptr);
+
+ pb_ptr = f->shared->pb_ptr;
+
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert(pb_ptr->min_rd_pages < pb_ptr->max_pages);
+ HDassert(f->shared->lf);
- /* Translate to file driver I/O info object */
file = f->shared->lf;
- /* Check if existing pages for raw data need to be updated since raw data access is not atomic */
- if(H5FD_MEM_DRAW == type && size >= page_buf->page_size) {
- /* For each touched page, check if it exists in the page buffer, and
- * update it with the data in the buffer to keep it up to date
+ HDassert(H5FD_MEM_DRAW != type);
+ HDassert(buf);
+
+ /* Calculate the aligned address of the first page */
+ page = (addr / pb_ptr->page_size);
+ page_addr = page * pb_ptr->page_size;
+
+ if ( page_addr != addr ) { /* case 6 */
+
+ /* If the read is for metadata and not page aligned, clip
+ * the read to the end of the current page if necessary.
+ * Load the relevant page if necessary and satisfy the
+ * read from the page buffer. Note that it there is an
+ * existing page, it must not be a multi-page metadata
+ * entry. It it is, flag an error.
*/
- for(i = 0; i < num_touched_pages; i++) {
- search_addr = i * page_buf->page_size + first_page_addr;
- /* Special handling for the first page if it is not a full page update */
- if(i == 0 && first_page_addr != addr) {
- /* Lookup the page in the skip list */
- page_entry = (H5PB_entry_t *)H5SL_search(page_buf->slist_ptr, (void *)(&search_addr));
- if(page_entry) {
- offset = addr - first_page_addr;
- HDassert(page_buf->page_size > offset);
-
- /* Update page's data */
- HDmemcpy((uint8_t *)page_entry->page_buf_ptr + offset, buf, page_buf->page_size - (size_t)offset);
-
- /* Mark page dirty and push to top of LRU */
- page_entry->is_dirty = TRUE;
- H5PB__MOVE_TO_TOP_LRU(page_buf, page_entry)
- } /* end if */
- } /* end if */
- /* Special handling for the last page if it is not a full page update */
- else if(num_touched_pages > 1 && i == (num_touched_pages - 1) &&
- (search_addr + page_buf->page_size) != (addr + size)) {
- HDassert(search_addr+page_buf->page_size > addr+size);
-
- /* Lookup the page in the skip list */
- page_entry = (H5PB_entry_t *)H5SL_search(page_buf->slist_ptr, (void *)(&search_addr));
- if(page_entry) {
- offset = (num_touched_pages - 2) * page_buf->page_size +
- (page_buf->page_size - (addr - first_page_addr));
-
- /* Update page's data */
- HDmemcpy(page_entry->page_buf_ptr, (const uint8_t *)buf + offset,
- (size_t)((addr + size) - last_page_addr));
+ offset = addr - page_addr;
- /* Mark page dirty and push to top of LRU */
- page_entry->is_dirty = TRUE;
- H5PB__MOVE_TO_TOP_LRU(page_buf, page_entry)
- } /* end if */
- } /* end else-if */
- /* Discard all fully written pages from the page buffer */
- else {
- page_entry = (H5PB_entry_t *)H5SL_remove(page_buf->slist_ptr, (void *)(&search_addr));
- if(page_entry) {
- /* Remove from LRU list */
- H5PB__REMOVE_LRU(page_buf, page_entry)
-
- /* Decrement page count of appropriate type */
- if(H5F_MEM_PAGE_DRAW == page_entry->type || H5F_MEM_PAGE_GHEAP == page_entry->type)
- page_buf->raw_count--;
- else
- page_buf->meta_count--;
-
- /* Free page info */
- page_entry->page_buf_ptr = H5FL_FAC_FREE(page_buf->page_fac, page_entry->page_buf_ptr);
- page_entry = H5FL_FREE(H5PB_entry_t, page_entry);
- } /* end if */
- } /* end else */
- } /* end for */
- } /* end if */
- else {
- /* An access could span 1 or 2 PBs at this point so we need to handle that */
- HDassert(1 == num_touched_pages || 2 == num_touched_pages);
- for(i = 0; i < num_touched_pages; i++) {
- haddr_t buf_offset;
-
- /* Calculate the aligned address of the page to search for it in the skip list */
- search_addr = (0 == i ? first_page_addr : last_page_addr);
-
- /* Calculate the access size if the access spans more than 1 page */
- if(1 == num_touched_pages)
- access_size = size;
- else
- access_size = (0 == i ? (size_t)(first_page_addr + page_buf->page_size - addr) : (size - access_size));
-
- /* Lookup the page in the skip list */
- page_entry = (H5PB_entry_t *)H5SL_search(page_buf->slist_ptr, (void *)(&search_addr));
-
- /* If found */
- if(page_entry) {
- offset = (0 == i ? addr - page_entry->addr : 0);
- buf_offset = (0 == i ? 0 : size - access_size);
-
- /* Copy the requested data from the input buffer into the page */
- HDmemcpy((uint8_t *)page_entry->page_buf_ptr + offset, (const uint8_t *)buf + buf_offset, access_size);
-
- /* Mark page dirty and push to top of LRU */
- page_entry->is_dirty = TRUE;
- H5PB__MOVE_TO_TOP_LRU(page_buf, page_entry)
-
- /* Update statistics */
- if(type == H5FD_MEM_DRAW || type == H5FD_MEM_GHEAP)
- page_buf->hits[1]++;
- else
- page_buf->hits[0]++;
- } /* end if */
- /* If not found */
- else {
- void *new_page_buf;
- size_t page_size = page_buf->page_size;
-
- /* Make space for new entry */
- if((H5SL_count(page_buf->slist_ptr) * page_buf->page_size) >= page_buf->max_size) {
- htri_t can_make_space;
-
- /* Check if we can make space in page buffer */
- if((can_make_space = H5PB__make_space(f, page_buf, type)) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_NOSPACE, FAIL, "make space in Page buffer Failed")
-
- /* If make_space returns 0, then we can't use the page
- * buffer for this I/O and we need to bypass
+ if ( (offset + size) <= pb_ptr->page_size ) {
+
+ clipped_size = size;
+
+ } else {
+
+ clipped_size = size - ( (offset + size) - pb_ptr->page_size);
+ }
+
+ HDassert( clipped_size > 0 );
+ HDassert( clipped_size <= size );
+ HDassert( (offset + clipped_size) <= pb_ptr->page_size );
+
+ /* get the containing page */
+ H5PB__SEARCH_INDEX(pb_ptr, page, entry_ptr, FAIL)
+
+ /* update hit rate stats */
+ H5PB__UPDATE_PB_HIT_RATE_STATS(pb_ptr, ((entry_ptr) != NULL), \
+ TRUE, FALSE)
+
+ if ( ( NULL == entry_ptr ) &&
+ ( H5PB__load_page(f, pb_ptr, page_addr, type, &entry_ptr) < 0 ) )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, \
+ "page buffer page load request failed (1)")
+
+ HDassert(entry_ptr);
+ HDassert(entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+ HDassert(entry_ptr->addr == page_addr);
+ HDassert(entry_ptr->is_metadata);
+ HDassert(!(entry_ptr->is_mpmde));
+
+ /* copy data from the page into read buffer */
+ HDmemcpy((uint8_t *)buf, (uint8_t *)(entry_ptr->image_ptr) + offset,
+ clipped_size);
+
+ /* if the entry is on the LRU, update the replacement policy */
+ if ( ( ! (entry_ptr->is_mpmde) ) &&
+ ( entry_ptr->delay_write_until == 0 ) ) {
+
+ H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, FAIL)
+ }
+ } else {
+
+ HDassert( page_addr == addr );
+
+ if ( size > pb_ptr->page_size ) {
+
+ /* search the page buffer for an entry at page */
+ H5PB__SEARCH_INDEX(pb_ptr, page, entry_ptr, FAIL)
+
+
+ if ( entry_ptr == NULL ) { /* case 7 */
+
+ /* update hit rate stats */
+ H5PB__UPDATE_PB_HIT_RATE_STATS(pb_ptr, FALSE, TRUE, TRUE)
+
+ /* If the read is for metadata, is page aligned, is larger
+ * than one page, and there is no entry in the page buffer,
+ * satisfy the read from the file
+ */
+#if VFD_IO /* JRM */
+ if ( H5FD_read(file, type, addr, size, buf) < 0)
+#else /* VFD_IO */ /* JRM */
+ if ( H5F__accum_read(f, type, addr, size, buf) < 0 )
+#endif /* VFD_IO */ /* JRM */
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, \
+ "driver read request failed (1)")
+ } else {
+
+ HDassert( entry_ptr );
+ HDassert( entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC );
+ HDassert( entry_ptr->is_metadata );
+
+ if ( ! ( entry_ptr->is_mpmde ) ) { /* case 8 */
+
+ /* If the read is for metadata, is page aligned, is larger
+ * than one page, and there is a regular entry at the target
+ * page address, test to see if the last read was for the
+ * same address.
+ *
+ * If was, evict the page, and satisfy the read from file.
+ * Flag an error if the page was dirty.
+ *
+ * If the last read was for a different page, clip the read
+ * to one page, and satisfy the read from the existing
+ * regular entry.
*/
- if(0 == can_make_space) {
- HDassert(0 == i);
-
- /* Write to VFD and return */
- if(H5FD_write(file, type, addr, size, buf) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, "driver write request failed")
-
- /* Break out of loop */
- break;
- } /* end if */
- } /* end if */
-
- /* Don't bother searching if there is no write access */
- if(H5F_ACC_RDWR & H5F_INTENT(f))
- /* Lookup & remove the page from the new skip list page if
- * it exists to see if this is a new page from the MF layer
+
+ HDassert( entry_ptr->size == pb_ptr->page_size );
+
+ if ( addr == prev_addr ) {
+
+ /* since this is a second try, don't update
+ * hit rate stats.
+ */
+
+ HDassert( ! ( entry_ptr->is_dirty ) );
+
+ if ( H5PB__evict_entry(pb_ptr, entry_ptr, TRUE) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "forced eviction failed (1)")
+#if VFD_IO /* JRM */
+ if ( H5FD_read(file, type, addr, size, buf) < 0)
+#else /* VFD_IO */ /* JRM */
+ if ( H5F__accum_read(f, type, addr, size, buf) < 0 )
+#endif /* VFD_IO */ /* JRM */
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, \
+ "driver read request failed (2)")
+ } else {
+
+ HDassert( entry_ptr->image_ptr );
+
+ /* copy data from the page into read buffer */
+ HDmemcpy((uint8_t *)buf,
+ (uint8_t *)(entry_ptr->image_ptr),
+ entry_ptr->size);
+
+ /* if the entry is on the LRU, update the replacement
+ * policy
+ */
+ if ( ( ! (entry_ptr->is_mpmde) ) &&
+ ( entry_ptr->delay_write_until == 0 ) ) {
+
+ H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, FAIL)
+ }
+
+ /* update hit rate stats */
+ H5PB__UPDATE_PB_HIT_RATE_STATS(pb_ptr, TRUE, TRUE, TRUE)
+ }
+ } else { /* case 9 */
+
+ /* If the read is for metadata, is page aligned, is larger
+ * than one page, and there is a multi-page metadata entry
+ * at the target page address, test to see if
+ * pb_ptr->vfd_swmr_write is TRUE.
+ *
+ * If it is, satisfy the read from the multi-page metadata
+ * entry, clipping the read if necessary.
+ *
+ * if pb_ptr->vfd_swmr_write is FALSE, flag an error.
*/
- page_entry = (H5PB_entry_t *)H5SL_remove(page_buf->mf_slist_ptr, (void *)(&search_addr));
-
- /* Calculate offset into the buffer of the page and the user buffer */
- offset = (0 == i ? addr - search_addr : 0);
- buf_offset = (0 == i ? 0 : size - access_size);
-
- /* If found, then just update the buffer pointer to the newly allocate buffer */
- if(page_entry) {
- /* Allocate space for the page buffer */
- if(NULL == (new_page_buf = H5FL_FAC_MALLOC(page_buf->page_fac)))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTALLOC, FAIL, "memory allocation failed for page buffer entry")
- HDmemset(new_page_buf, 0, (size_t)offset);
- HDmemset((uint8_t *)new_page_buf + offset + access_size, 0, page_size - ((size_t)offset + access_size));
-
- page_entry->page_buf_ptr = new_page_buf;
-
- /* Update statistics */
- if(type == H5FD_MEM_DRAW || type == H5FD_MEM_GHEAP)
- page_buf->hits[1]++;
- else
- page_buf->hits[0]++;
- } /* end if */
- /* Otherwise read page through the VFD layer, but make sure we don't read past the EOA. */
- else {
- haddr_t eoa, eof = HADDR_UNDEF;
-
- /* Allocate space for the page buffer */
- if(NULL == (new_page_buf = H5FL_FAC_CALLOC(page_buf->page_fac)))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTALLOC, FAIL, "memory allocation failed for page buffer entry")
-
- /* Create the new loaded PB entry */
- if(NULL == (page_entry = H5FL_CALLOC(H5PB_entry_t)))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTALLOC, FAIL, "memory allocation failed")
-
- page_entry->page_buf_ptr = new_page_buf;
- page_entry->addr = search_addr;
- page_entry->type = (H5F_mem_page_t)type;
-
- /* Retrieve the 'eoa' for the file */
- if(HADDR_UNDEF == (eoa = H5F_get_eoa(f, type)))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, "driver get_eoa request failed")
-
- /* If the entire page falls outside the EOA, then fail */
- if(search_addr > eoa)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_BADVALUE, FAIL, "writing to a page that is outside the file EOA")
-
- /* Retrieve the 'eof' for the file - The MPI-VFD EOF
- * returned will most likely be HADDR_UNDEF, so skip
- * that check.
+ HDassert( entry_ptr->is_mpmde );
+ HDassert( pb_ptr->vfd_swmr_writer );
+
+ if ( size > entry_ptr->size ) {
+
+ clipped_size = entry_ptr->size;
+
+ } else {
+
+ clipped_size = size;
+ }
+
+ /* copy data from the page into read buffer */
+ HDmemcpy((uint8_t *)buf, (uint8_t *)(entry_ptr->image_ptr),
+ clipped_size);
+
+ /* if the entry is on the LRU, update the replacement
+ * policy
*/
- if(!H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI))
- if(HADDR_UNDEF == (eof = H5FD_get_eof(f->shared->lf, H5FD_MEM_DEFAULT)))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, "driver get_eof request failed")
-
- /* Adjust the read size to not go beyond the EOA */
- if(search_addr + page_size > eoa)
- page_size = (size_t)(eoa - search_addr);
-
- if(search_addr < eof) {
- if(H5FD_read(file, type, search_addr, page_size, new_page_buf) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, "driver read request failed")
-
- /* Update statistics */
- if(type == H5FD_MEM_DRAW || type == H5FD_MEM_GHEAP)
- page_buf->misses[1]++;
- else
- page_buf->misses[0]++;
- } /* end if */
- } /* end else */
-
- /* Copy the requested data from the page into the input buffer */
- HDmemcpy((uint8_t *)new_page_buf + offset, (const uint8_t *)buf+buf_offset, access_size);
-
- /* Page is dirty now */
- page_entry->is_dirty = TRUE;
-
- /* Insert page into PB, evicting other pages as necessary */
- if(H5PB__insert_entry(page_buf, page_entry) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTSET, FAIL, "error inserting new page in page buffer")
- } /* end else */
- } /* end for */
- } /* end else */
+ if ( ( ! (entry_ptr->is_mpmde) ) &&
+ ( entry_ptr->delay_write_until == 0 ) ) {
+
+ H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, FAIL)
+ }
+
+ /* update hit rate stats */
+ H5PB__UPDATE_PB_HIT_RATE_STATS(pb_ptr, TRUE, TRUE, TRUE)
+ }
+ }
+ } else { /* case 10 */
+
+ /* If the read is for metadata, is page aligned, is no
+ * larger than a page, test to see if the page buffer
+ * contains a page at the target address.
+ *
+ * If it doesn't, load the page and satisfy the read
+ * from it.
+ *
+ * If it contains a regular page entry, satisfy the read
+ * from it.
+ *
+ * If it contains a multipage metadata entry at the target
+ * address, satisfy the read from the multi-page metadata
+ * entry if pb_ptr->vfd_swmr_write is TRUE, and flag an
+ * error otherwise.
+ */
+ HDassert( size <= pb_ptr->page_size );
+
+ /* get the containing page */
+ H5PB__SEARCH_INDEX(pb_ptr, page, entry_ptr, FAIL)
+
+ /* update hit rate stats */
+ H5PB__UPDATE_PB_HIT_RATE_STATS(pb_ptr, (entry_ptr != NULL), \
+ TRUE, FALSE)
+
+ if ( ( NULL == entry_ptr ) &&
+ ( H5PB__load_page(f, pb_ptr, page_addr, type, &entry_ptr) < 0))
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, \
+ "page buffer page load request failed (2)")
+
+ HDassert( entry_ptr );
+ HDassert( entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC );
+ HDassert( entry_ptr->is_metadata );
+ HDassert( ( ! ( entry_ptr->is_mpmde ) ) ||
+ ( pb_ptr->vfd_swmr_writer) );
+
+ /* copy data from the page into read buffer */
+ HDmemcpy((uint8_t *)buf, (uint8_t *)(entry_ptr->image_ptr), size);
+
+ /* if the entry is on the LRU, update the replacement policy */
+ if ( ( ! (entry_ptr->is_mpmde) ) &&
+ ( entry_ptr->delay_write_until == 0 ) ) {
+
+ H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, FAIL)
+ }
+ }
+ }
+
+ prev_addr = addr;
done:
+
FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5PB_write() */
+
+} /* end H5PB__read_meta() */
/*-------------------------------------------------------------------------
- * Function: H5PB__insert_entry()
*
- * Purpose: ???
+ * Function: H5PB__read_raw
*
- * This function was created without documentation.
- * What follows is my best understanding of Mohamad's intent.
+ * Purpose: Satisfy a raw data read in cases 3 and 4 from H5PB_read().
+ * Specifically:
*
- * Insert the supplied page into the page buffer, both the
- * skip list and the LRU.
+ * 3) If the read is for raw data, and it is larger than the
+ * page size, read it directly from the HDF5 file.
*
- * As best I can tell, this function imposes no limit on the
- * number of entries in the page buffer beyond an assertion
- * failure it the page count exceeds the limit.
+ * It is possible that the page buffer contains dirty pages
+ * that intersect with the read -- test for this and update
+ * the read buffer from the page buffer if any such pages
+ * exist.
*
- * JRM -- 12/22/16
+ * Note that no pages are inserted into the page buffer in
+ * this case.
*
+ * 4) If the read is for raw data, and it is of size less
+ * than or equal to the page size, satisfy the read from
+ * the page buffer, loading and inserting pages into the
+ * page buffer as necessary
+ *
+ * Observe that this implies that:
+ *
+ * 1) The page buffer is defined.
+ *
+ * 2) The page buffer has been configured to accept at least
+ * one page of raw data.
+ *
+ * 2) This is a raw data read.
*
* Return: Non-negative on success/Negative on failure
*
- * Programmer: Mohamad Chaarawi
+ * Programmer: John Mainzer -- 10/11/18
+ *
+ * Changes: None.
*
*-------------------------------------------------------------------------
*/
-static herr_t
-H5PB__insert_entry(H5PB_t *page_buf, H5PB_entry_t *page_entry)
+static herr_t
+H5PB__read_raw(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
+ void *buf/*out*/)
{
- herr_t ret_value = SUCCEED; /* Return value */
+ H5PB_t *pb_ptr; /* Page buffer for this file */
+ H5PB_entry_t *entry_ptr; /* Pointer to page buffer entry */
+ uint64_t first_page; /* page offset of first I/O */
+ uint64_t last_page; /* page offset of last I/O */
+ uint64_t search_page; /* page offset of current page */
+ haddr_t first_page_addr; /* address of first page of I/O */
+ haddr_t last_page_addr; /* address of last page of I/O */
+ haddr_t search_addr; /* Address of current page */
+ hsize_t num_touched_pages; /* Number of pages accessed */
+ size_t offset; /* offset of read in page */
+ size_t length; /* length of read in page */
+ hsize_t i; /* Local index variable */
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
- FUNC_ENTER_STATIC
+ /* Sanity checks */
+ HDassert(f);
+ HDassert(f->shared);
+ HDassert(f->shared->pb_ptr);
- /* Insert entry in skip list */
- if(H5SL_insert(page_buf->slist_ptr, page_entry, &(page_entry->addr)) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTINSERT, FAIL, "can't insert entry in skip list")
- HDassert(H5SL_count(page_buf->slist_ptr) * page_buf->page_size <= page_buf->max_size);
+ pb_ptr = f->shared->pb_ptr;
- /* Increment appropriate page count */
- if(H5F_MEM_PAGE_DRAW == page_entry->type || H5F_MEM_PAGE_GHEAP == page_entry->type)
- page_buf->raw_count++;
- else
- page_buf->meta_count++;
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert(pb_ptr->min_md_pages < pb_ptr->max_pages);
+ HDassert(H5FD_MEM_DRAW == type);
- /* Insert entry in LRU */
- H5PB__INSERT_LRU(page_buf, page_entry)
+
+ /* Calculate the aligned address of the first page */
+ first_page = (addr / pb_ptr->page_size);
+ first_page_addr = first_page * pb_ptr->page_size;
+
+ /* Calculate the aligned address of the last page */
+ last_page = ((addr + size - 1) / pb_ptr->page_size);
+ last_page_addr = last_page * pb_ptr->page_size;
+
+ /* Calculate number of pages that this read spans. */
+ num_touched_pages = last_page - first_page + 1;
+
+ if ( first_page_addr == last_page_addr ) {
+
+ HDassert(1 == num_touched_pages);
+ last_page_addr = HADDR_UNDEF;
+
+ }
+
+ /* case 3) raw data read of page size or greater. */
+ if ( size >= pb_ptr->page_size ) {
+
+#if VFD_IO
+ if ( H5FD_read(f->shared->lf, type, addr, size, buf) < 0)
+#else /* VFD_IO */
+ if ( H5F__accum_read(f, type, addr, size, buf) < 0 )
+#endif /* VFD_IO */
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, \
+ "read through metadata accumulator failed")
+
+
+ /* For each page that intersects with the above read, check to see
+ * if it exists in the page buffer, and if so, if it is dirty.
+ *
+ * If it does and is, update the read buffer with the contents
+ * of the page so we get the up to date data into the buffer
+ * after the big read from the file.
+ */
+ search_page = first_page;
+ search_addr = first_page_addr;
+
+ for(i = 0; i < num_touched_pages; i++) {
+
+ H5PB__SEARCH_INDEX(pb_ptr, search_page, entry_ptr, FAIL)
+
+ /* update hit rate stats */
+ H5PB__UPDATE_PB_HIT_RATE_STATS(pb_ptr, (entry_ptr != NULL), \
+ FALSE, FALSE)
+
+ if ( entry_ptr ) {
+
+ HDassert( entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC );
+ HDassert( ! ( entry_ptr->is_metadata ) );
+ HDassert( entry_ptr->page == search_page );
+ HDassert( entry_ptr->addr == search_addr );
+ HDassert( entry_ptr->size == pb_ptr->page_size );
+ HDassert( entry_ptr->delay_write_until == 0 );
+ HDassert( entry_ptr->addr <= addr + size );
+ HDassert( entry_ptr->addr + entry_ptr->size <= addr + size );
+
+ if ( entry_ptr->is_dirty ) {
+
+ if ( i == 0 ) {
+
+ /* handle the possible partial access of the
+ * first page.
+ */
+
+ HDassert( search_addr == first_page_addr );
+ HDassert( search_page == first_page );
+
+ offset = addr - first_page_addr;
+
+ HDassert((( offset == 0 ) && (search_addr == addr )) ||
+ (( offset > 0 ) && ( search_addr < addr )));
+
+ HDassert(pb_ptr->page_size >= offset);
+
+ HDassert( size >= pb_ptr->page_size - (size_t)offset );
+
+ HDmemcpy(buf, (uint8_t *)entry_ptr->image_ptr + offset,
+ pb_ptr->page_size - (size_t)offset);
+
+ } else if ( i == num_touched_pages - 1 ) {
+
+ /* handle the possible partial access of the
+ * last page.
+ */
+ HDassert( i > 0 );
+ HDassert( search_addr == last_page_addr );
+ HDassert( search_page == last_page );
+ HDassert( addr < last_page_addr );
+ HDassert( last_page_addr < addr + size );
+
+ offset = (num_touched_pages - 2) * pb_ptr->page_size +
+ (pb_ptr->page_size - (addr - first_page_addr));
+
+ HDmemcpy((uint8_t *)buf + offset, entry_ptr->image_ptr,
+ (size_t)((addr + size) - last_page_addr));
+
+ } else {
+
+ /* this is an internal page -- copy it in its
+ * entireity.
+ */
+
+ offset = (i - 1) * pb_ptr->page_size +
+ (pb_ptr->page_size - (addr - first_page_addr));
+
+ HDassert ( addr + offset == search_addr );
+ HDassert ( offset + pb_ptr->page_size <= size );
+
+ HDmemcpy(((uint8_t *)(buf) + offset),
+ entry_ptr->image_ptr,
+ pb_ptr->page_size);
+ }
+
+ /* we have touched the entry -- move it to the top
+ * of the LRU if it resides there.
+ *
+ * The entry will be on the LRU if both it is not
+ * a multi-page metadata entry and it is not
+ * subject to a delayed write.
+ *
+ * As this is a raw data page buffer entry, both of
+ * these must be true, and are asserted above.
+ *
+ * Thus, just update the LRU.
+ */
+ H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, FAIL)
+
+ } /* if ( entry_ptr->is_dirty ) */
+ } /* if ( entry_ptr ) */
+
+ search_page++;
+ search_addr += pb_ptr->page_size;
+
+ } /* end for */
+ } else {
+ /* case 4: Raw data read of size less than page size.
+ *
+ * In this case, read the desired data from the page buffer, loading
+ * pages if necessary.
+ */
+ HDassert(size < pb_ptr->page_size);
+
+ /* first page */
+ offset = addr - first_page_addr;
+
+ if ( (offset + size) < pb_ptr->page_size ) {
+
+ HDassert(num_touched_pages == 1);
+ length = size;
+
+ } else {
+
+ HDassert(num_touched_pages == 2);
+ length = size - (pb_ptr->page_size - offset);
+ }
+
+ /* get the first page */
+ H5PB__SEARCH_INDEX(pb_ptr, first_page, entry_ptr, FAIL)
+
+ /* update hit rate stats */
+ H5PB__UPDATE_PB_HIT_RATE_STATS(pb_ptr, (entry_ptr != NULL), \
+ FALSE, FALSE)
+
+ if ( ( NULL == entry_ptr ) &&
+ ( H5PB__load_page(f, pb_ptr, first_page_addr,
+ type, &entry_ptr) < 0 ) )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, \
+ "page buffer page load request failed (1)")
+
+ HDassert(entry_ptr);
+ HDassert(entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+ HDassert(entry_ptr->addr == first_page_addr);
+
+
+ /* copy data from first page into read buffer */
+ HDmemcpy((uint8_t *)buf, ((uint8_t *)(entry_ptr->image_ptr) + offset),
+ length);
+
+ H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, FAIL)
+
+ /* second page, if it exists */
+ if ( num_touched_pages == 2 ) {
+
+ offset = length;
+ length = size - offset;
+
+ HDassert(offset + length == size);
+
+ /* get the second page */
+ H5PB__SEARCH_INDEX(pb_ptr, last_page, entry_ptr, FAIL)
+
+ /* update hit rate stats */
+ H5PB__UPDATE_PB_HIT_RATE_STATS(pb_ptr, (entry_ptr != NULL), \
+ FALSE, FALSE)
+
+ if ( ( NULL == entry_ptr ) &&
+ ( H5PB__load_page(f, pb_ptr, last_page_addr,
+ type, &entry_ptr) < 0 ) )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, \
+ "page buffer page load request failed (2)")
+
+ HDassert(entry_ptr);
+ HDassert(entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+ HDassert(entry_ptr->addr == last_page_addr);
+ HDassert(entry_ptr->page == last_page);
+
+ /* copy data from second page into read buffer */
+ HDmemcpy(((uint8_t *)(buf) + offset),
+ (uint8_t *)(entry_ptr->image_ptr), length);
+
+ H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, FAIL)
+ }
+ } /* end else */
done:
+
FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5PB__insert_entry() */
+
+} /* end H5PB__read_raw() */
/*-------------------------------------------------------------------------
- * Function: H5PB__make_space()
*
- * Purpose: ???
+ * Function: H5PB__write_meta
+ *
+ * Purpose: Satisfy a metadata read in cases 7 and 8 from H5PB_write().
+ * Specifically:
+ *
+ * 7) If the write is of metadata, the write is larger than
+ * one page, and vfd_swmr_writer is TRUE, the write must
+ * buffered in the page buffer until the end of the tick.
+ *
+ * Create a multi-page metadata entry in the page buffer
+ * and copy the write into it. Insert the new entry in
+ * the tick list.
+ *
+ * Test to see if the write of the multi-page metadata
+ * entry must be delayed. If so, place the entry in
+ * the delayed write list. Otherwise, write the multi-page
+ * metadata entry to the HDF5 file.
+ *
+ * 8) If the write is of metadata, and the write is of size
+ * less than or equal to the page size, write the data
+ * into the page buffer, loading and inserting a page
+ * if necessary.
+ *
+ * If, in addition, vfd_swmr_writer is TRUE, we must:
*
- * This function was created without documentation.
- * What follows is my best understanding of Mohamad's intent.
+ * * add the page touched by the write to the tick list
+ * so that it will be buffered until the end of the
+ * tick.
*
- * If necessary and if possible, evict a page from the page
- * buffer to make space for the supplied page. Depending on
- * the page buffer configuration and contents, and the page
- * supplied this may or may not be possible.
+ * * test to see if the write must be delayed, and
+ * add the page to the delayed write list if so.
*
- * JRM -- 12/22/16
+ * Observe that this implies that:
+ *
+ * 1) The page buffer is defined.
+ *
+ * 2) The page buffer has been configured to accept at least
+ * one page of metadata.
+ *
+ * 3) This is a metadata read.
+ *
+ * Note also that if the metadata read is of size
+ * no larger than page size, it may not cross page
+ * boundaries.
+ *
+ * Further, for reads larger than page size (case 7 only),
+ * the base address must be page aligned.
*
* Return: Non-negative on success/Negative on failure
*
- * Programmer: Mohamad Chaarawi
+ * Programmer: John Mainzer -- 10/11/18
+ *
+ * Changes: None.
*
*-------------------------------------------------------------------------
*/
-static htri_t
-H5PB__make_space(H5F_t *f, H5PB_t *page_buf, H5FD_mem_t inserted_type)
+static herr_t
+H5PB__write_meta(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
+ const void *buf/*out*/)
{
- H5PB_entry_t *page_entry; /* Pointer to page eviction candidate */
- htri_t ret_value = TRUE; /* Return value */
+ H5PB_t *pb_ptr; /* Page buffer for this file */
+ H5PB_entry_t *entry_ptr; /* Pointer to page buffer entry */
+ uint64_t page; /* page offset of addr */
+ haddr_t page_addr; /* page containg addr */
+ size_t offset; /* offset of write in page */
+ herr_t ret_value = SUCCEED; /* Return value */
- FUNC_ENTER_STATIC
+ FUNC_ENTER_NOAPI(FAIL)
- /* Sanity check */
+ /* Sanity checks */
HDassert(f);
- HDassert(page_buf);
-
- /* Get oldest entry */
- page_entry = page_buf->LRU_tail_ptr;
-
- if(H5FD_MEM_DRAW == inserted_type) {
- /* If threshould is 100% metadata and page buffer is full of
- metadata, then we can't make space for raw data */
- if(0 == page_buf->raw_count && page_buf->min_meta_count == page_buf->meta_count) {
- HDassert(page_buf->meta_count * page_buf->page_size == page_buf->max_size);
- HGOTO_DONE(FALSE)
- } /* end if */
-
- /* check the metadata threshold before evicting metadata items */
- while(1) {
- if(page_entry->prev && H5F_MEM_PAGE_META == page_entry->type &&
- page_buf->min_meta_count >= page_buf->meta_count)
- page_entry = page_entry->prev;
- else
- break;
- } /* end while */
- } /* end if */
- else {
- /* If threshould is 100% raw data and page buffer is full of
- raw data, then we can't make space for meta data */
- if(0 == page_buf->meta_count && page_buf->min_raw_count == page_buf->raw_count) {
- HDassert(page_buf->raw_count * page_buf->page_size == page_buf->max_size);
- HGOTO_DONE(FALSE)
- } /* end if */
-
- /* check the raw data threshold before evicting raw data items */
- while(1) {
- if(page_entry->prev && (H5F_MEM_PAGE_DRAW == page_entry->type || H5F_MEM_PAGE_GHEAP == page_entry->type) &&
- page_buf->min_raw_count >= page_buf->raw_count)
- page_entry = page_entry->prev;
- else
- break;
- } /* end while */
- } /* end else */
+ HDassert(f->shared);
+ HDassert(f->shared->pb_ptr);
- /* Remove from page index */
- if(NULL == H5SL_remove(page_buf->slist_ptr, &(page_entry->addr)))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_BADVALUE, FAIL, "Tail Page Entry is not in skip list")
+ pb_ptr = f->shared->pb_ptr;
- /* Remove entry from LRU list */
- H5PB__REMOVE_LRU(page_buf, page_entry)
- HDassert(H5SL_count(page_buf->slist_ptr) == page_buf->LRU_list_len);
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert(pb_ptr->min_rd_pages < pb_ptr->max_pages);
+ HDassert(H5FD_MEM_DRAW != type);
+ HDassert(buf);
- /* Decrement appropriate page type counter */
- if(H5F_MEM_PAGE_DRAW == page_entry->type || H5F_MEM_PAGE_GHEAP == page_entry->type)
- page_buf->raw_count--;
- else
- page_buf->meta_count--;
+ /* Calculate the aligned address of the first page */
+ page = (addr / pb_ptr->page_size);
+ page_addr = page * pb_ptr->page_size;
+
+ /* if size > pb_ptr->page_size, addr must be page aligned */
+ HDassert((size <= pb_ptr->page_size) || (addr == page_addr));
+
+
+ /* case 7) metadata read of size greater than page size. */
+ if ( size > pb_ptr->page_size ) {
+
+ /* The write must be for a multi-page metadata entry, and
+ * we must be running as a VFD SWMR writer.
+ *
+ * This requires the following actions:
+ *
+ * 1) If the multi-page metadata entry is not alrady in the
+ * page buffer, create an entry for it.
+ *
+ * 2) Overwrite the image of the entry with the write buffer.
+ *
+ * 3) If the entry is not alread on the tick list, add it to
+ * the tick list.
+ *
+ * 4) If the entry is not alread on the delayed write list,
+ * test to see if it should be, and move it from the
+ * LRU to the delayed write list and set the delay_write_until
+ * field appropriately.
+ */
+ HDassert(pb_ptr->vfd_swmr_writer);
+ HDassert(addr == page_addr);
- /* Flush page if dirty */
- if(page_entry->is_dirty)
- if(H5PB__write_entry(f, page_entry) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, "file write failed")
+ H5PB__SEARCH_INDEX(pb_ptr, page, entry_ptr, FAIL)
+
+ /* update hit rate stats */
+ H5PB__UPDATE_PB_HIT_RATE_STATS(pb_ptr, (entry_ptr != NULL), \
+ TRUE, TRUE)
+
+ if ( NULL == entry_ptr ) {
+
+ /* the multi-page metadata entry is not currently in the page
+ * buffer. Create an entry for it, and insert it into the LRU.
+ *
+ * Don't bother to try to make space for it, as VFD SWMR
+ * ignores the limits on page buffer size.
+ */
+ if ( H5PB__create_new_page(pb_ptr, addr, size, type,
+ FALSE, &entry_ptr) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "can't create new page buffer page")
+ }
+
+ /* at this point, one way or the other, the multi-page metadata
+ * entry must be in the page buffer.
+ */
+ HDassert(entry_ptr->is_metadata);
+ HDassert(entry_ptr->is_mpmde);
+ HDassert(size == entry_ptr->size);
+ HDassert(type == entry_ptr->mem_type);
+
+ /* overwrite the entry image with the write buffer */
+ HDmemcpy((uint8_t *)(entry_ptr->image_ptr), buf, size);
+
+ /* mark the entry dirty */
+ if ( H5PB__mark_entry_dirty(pb_ptr, entry_ptr) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "mark entry dirty failed (1)")
+
+
+ /* insert in tick list if not there already */
+ if ( ! ( entry_ptr->modified_this_tick ) ) {
+
+ H5PB__INSERT_IN_TL(pb_ptr, entry_ptr, FAIL)
+ }
+
+ /* Test to see if we must delay the write of the multi-page
+ * metadata entry, and move it from the LRU to the delayed write
+ * list if so.
+ */
+
+ /* Write function for this -- assert false for now */
+ HDassert(FALSE);
+
+ } else {
+ /* case 8) metadata write of size no larger than page size */
+
+ offset = addr - page_addr;
+
+ /* write cannot cross page boundaries. */
+ HDassert((offset + size) <= pb_ptr->page_size);
+
+ /* get the containing page */
+ H5PB__SEARCH_INDEX(pb_ptr, page, entry_ptr, FAIL)
+
+ /* update hit rate stats */
+ H5PB__UPDATE_PB_HIT_RATE_STATS(pb_ptr, (entry_ptr != NULL), \
+ TRUE, FALSE)
+
+#if 1 /* JRM */
+ /* Since the space allocation code doesn't always tell the page
+ * buffer when a page is freed, it is possible that the page
+ * found by the index search is an ophaned raw data page.
+ *
+ * Until this is fixed, test to see entry_ptr points to
+ * a raw data page, and force its eviction if it does.
+ *
+ * Remove this code as soon as the space allocation code is
+ * updated to tell the page buffer to discard pages when
+ * they are freed.
+ */
+ if ( ( entry_ptr ) && ( ! ( entry_ptr->is_metadata ) ) ) {
+
+ if ( H5PB__evict_entry(pb_ptr, entry_ptr, TRUE) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "forced eviction failed")
- /* Update statistics */
- if(page_entry->type == H5F_MEM_PAGE_DRAW || H5F_MEM_PAGE_GHEAP == page_entry->type)
- page_buf->evictions[1]++;
- else
- page_buf->evictions[0]++;
+ entry_ptr = NULL;
+ }
+#endif /* JRM */
- /* Release page */
- page_entry->page_buf_ptr = H5FL_FAC_FREE(page_buf->page_fac, page_entry->page_buf_ptr);
- page_entry = H5FL_FREE(H5PB_entry_t, page_entry);
+ if ( ( NULL == entry_ptr ) &&
+ ( H5PB__load_page(f, pb_ptr, page_addr, type, &entry_ptr) < 0 ) )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, \
+ "page buffer page load request failed (1)")
+
+ HDassert(entry_ptr);
+ HDassert(entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+ HDassert(entry_ptr->addr == page_addr);
+ HDassert(entry_ptr->is_metadata);
+ HDassert(!(entry_ptr->is_mpmde));
+ HDassert(entry_ptr->size == pb_ptr->page_size);
+ HDassert(size <= entry_ptr->size);
+
+ /* copy data from the write buffer into the page image */
+ HDmemcpy(((uint8_t *)(entry_ptr->image_ptr) + offset),
+ (const uint8_t *)buf, size);
+
+ if ( H5PB__mark_entry_dirty(pb_ptr, entry_ptr) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "mark entry dirty failed (2)")
+
+ if ( pb_ptr->vfd_swmr_writer ) {
+
+ /* test to see if the entry is on the tick list, and insert
+ * it if it is not. This will force the page buffer to retain
+ * the page until the end of the tick.
+ */
+ if ( ! ( entry_ptr->modified_this_tick ) ) {
+
+ H5PB__INSERT_IN_TL(pb_ptr, entry_ptr, FAIL)
+ }
+
+ /* Test to see if we must delay the write of the multi-page
+ * metadata entry, and move it from the LRU to the delayed write
+ * list if so.
+ */
+
+ /* Write function for this -- assert false for now */
+ HDassert(FALSE);
+ }
+ }
done:
+
FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5PB__make_space() */
+
+} /* end H5PB__write_meta() */
/*-------------------------------------------------------------------------
- * Function: H5PB__write_entry()
*
- * Purpose: ???
+ * Function: H5PB__write_raw
+ *
+ * Purpose: Satisfy a raw data read in cases 3 and 4 from H5PB_write().
+ * Specifically:
+ *
+ * 3) If the write is raw data, and it of page size or
+ * larger, write directly from the HDF5 file.
+ *
+ * It is possible that the write intersects one or more
+ * pages in the page buffer -- test for this and update
+ * any partially written pages, and evict any pages
+ * that are completely overwritten.
+ *
+ * Note that no pages are inserted into the page buffer in
+ * this case.
*
- * This function was created without documentation.
- * What follows is my best understanding of Mohamad's intent.
+ * 4) If the write is of raw data, and it is of size less
+ * than the page size, write the page into the page
+ * buffer, loading and inserting pages into the
+ * page buffer as necessary
*
+ * Observe that this implies that:
+ *
+ * 1) The page buffer is defined.
+ *
+ * 2) The page buffer has been configured to accept at least
+ * one page of raw data.
+ *
+ * 2) This is a raw data write.
*
* Return: Non-negative on success/Negative on failure
*
- * Programmer: Mohamad Chaarawi
+ * Programmer: John Mainzer -- 10/11/18
+ *
+ * Changes: None.
*
*-------------------------------------------------------------------------
*/
static herr_t
-H5PB__write_entry(H5F_t *f, H5PB_entry_t *page_entry)
+H5PB__write_raw(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
+ const void *buf/*out*/)
{
- haddr_t eoa; /* Current EOA for the file */
- herr_t ret_value = SUCCEED; /* Return value */
+ H5PB_t *pb_ptr; /* Page buffer for this file */
+ H5PB_entry_t *entry_ptr; /* Pointer to page buffer entry */
+ uint64_t first_page; /* page offset of first I/O */
+ uint64_t last_page; /* page offset of last I/O */
+ uint64_t search_page; /* page offset of current page */
+ haddr_t first_page_addr; /* address of first page of I/O */
+ haddr_t last_page_addr; /* address of last page of I/O */
+ haddr_t search_addr; /* Address of current page */
+ hsize_t num_touched_pages; /* Number of pages accessed */
+ hsize_t i; /* Local index variable */
+ size_t length; /* length of write in a page */
+ size_t offset; /* offset of write in a page */
+ herr_t ret_value = SUCCEED; /* Return value */
- FUNC_ENTER_STATIC
+ FUNC_ENTER_NOAPI(FAIL)
- /* Sanity check */
+ /* Sanity checks */
HDassert(f);
- HDassert(page_entry);
+ HDassert(f->shared);
+ HDassert(f->shared->pb_ptr);
- /* Retrieve the 'eoa' for the file */
- if(HADDR_UNDEF == (eoa = H5F_get_eoa(f, (H5FD_mem_t)page_entry->type)))
- HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, "driver get_eoa request failed")
+ pb_ptr = f->shared->pb_ptr;
- /* If the starting address of the page is larger than
- * the EOA, then the entire page is discarded without writing.
- */
- if(page_entry->addr <= eoa) {
- H5FD_t *file; /* File driver I/O info */
- size_t page_size = f->shared->page_buf->page_size;
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert(pb_ptr->min_md_pages < pb_ptr->max_pages);
+ HDassert(f->shared->lf);
- /* Adjust the page length if it exceeds the EOA */
- if((page_entry->addr + page_size) > eoa)
- page_size = (size_t)(eoa - page_entry->addr);
+ HDassert(H5FD_MEM_DRAW == type);
- /* Translate to file driver I/O info object */
- file = f->shared->lf;
+ /* Calculate the aligned address of the first page */
+ first_page = (addr / pb_ptr->page_size);
+ first_page_addr = first_page * pb_ptr->page_size;
- if(H5FD_write(file, (H5FD_mem_t)page_entry->type, page_entry->addr, page_size, page_entry->page_buf_ptr) < 0)
- HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, "file write failed")
- } /* end if */
+ /* Calculate the aligned address of the last page */
+ last_page = ((addr + size - 1) / pb_ptr->page_size);
+ last_page_addr = last_page * pb_ptr->page_size;
+
+ /* Calculate number of pages that this read spans. */
+ num_touched_pages = last_page - first_page + 1;
+
+ if ( first_page_addr == last_page_addr ) {
+
+ HDassert(1 == num_touched_pages);
+ last_page_addr = HADDR_UNDEF;
+
+ }
+
+ /* case 3) raw data write of page size or greater. */
+ if ( size >= pb_ptr->page_size ) {
+#if VFD_IO
+ if ( H5FD_write(f->shared->lf, type, addr, size, buf) < 0 )
+#else /* VFD_IO */
+ if ( H5F__accum_write(f, type, addr, size, buf) < 0 )
+#endif /* VFD_IO */
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, \
+ "write through metadata accumulator failed")
+
+ /* For each page that intersects with the above write, check to see
+ * if it exists in the page buffer.
+ *
+ * If it does and is, and if the write overwrites page fully,
+ * mark the page clean and evict it.
+ *
+ * If the write only partially intersects a page, update the
+ * page and mark it dirty.
+ */
+ search_page = first_page;
+ search_addr = first_page_addr;
+
+ for(i = 0; i < num_touched_pages; i++) {
+
+ H5PB__SEARCH_INDEX(pb_ptr, search_page, entry_ptr, FAIL)
+
+ /* update hit rate stats */
+ H5PB__UPDATE_PB_HIT_RATE_STATS(pb_ptr, (entry_ptr != NULL), \
+ FALSE, FALSE)
+
+#if 1 /* JRM */
+ /* Since the space allocation code doesn't always tell the page
+ * buffer when a page is freed, it is possible that the page
+ * found by the index search is an ophaned metadata page.
+ *
+ * Until this is fixed, test to see entry_ptr points to
+ * a metadata page, and force its eviction if it does.
+ *
+ * Remove this code as soon as the space allocation code is
+ * updated to tell the page buffer to discard pages when
+ * they are freed.
+ */
+ if ( ( entry_ptr ) && ( entry_ptr->is_metadata ) ) {
+
+ if ( H5PB__evict_entry(pb_ptr, entry_ptr, TRUE) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "forced eviction failed")
+
+ entry_ptr = NULL;
+ }
+#endif /* JRM */
+
+ if ( entry_ptr ) {
+
+ HDassert( entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC );
+ HDassert( ! ( entry_ptr->is_metadata ) );
+ HDassert( entry_ptr->page == search_page );
+ HDassert( entry_ptr->addr == search_addr );
+ HDassert( entry_ptr->size == pb_ptr->page_size );
+ HDassert( entry_ptr->delay_write_until == 0 );
+ HDassert( entry_ptr->addr <= addr + size );
+
+ if ( ( addr <= entry_ptr->addr ) &&
+ ( entry_ptr->addr + entry_ptr->size <= addr + size ) ) {
+
+ /* the page is completely overwritten -- mark it clean
+ * and evict it.
+ */
+ if ( ( entry_ptr->is_dirty ) &&
+ ( H5PB__mark_entry_clean(pb_ptr, entry_ptr) < 0 ) )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "mark entry clean failed")
+
+ if ( H5PB__evict_entry(pb_ptr, entry_ptr, TRUE) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "forced eviction failed (1)")
+
+ } else if ( i == 0 ) {
+
+ /* handle partial overwrite of the first page. */
+
+ HDassert( search_addr == first_page_addr );
+ HDassert( search_page == first_page );
+ HDassert( search_addr < addr );
+ HDassert( entry_ptr->addr + entry_ptr->size <=
+ addr + size );
+
+ offset = addr - first_page_addr;
+
+ HDassert( offset > 0 );
+ HDassert( pb_ptr->page_size >= offset );
+ HDassert( size >= pb_ptr->page_size - (size_t)offset );
+
+ HDmemcpy((uint8_t *)entry_ptr->image_ptr + offset, buf,
+ pb_ptr->page_size - (size_t)offset);
+
+ if ( H5PB__mark_entry_dirty(pb_ptr, entry_ptr) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "mark entry dirty failed (1)")
+
+ } else if ( i == num_touched_pages - 1 ) {
+
+ /* handle partial overwrite of the last page. */
+ HDassert( i > 0 );
+ HDassert( search_addr == last_page_addr );
+ HDassert( search_page == last_page );
+ HDassert( addr < last_page_addr );
+ HDassert( last_page_addr < addr + size );
+
+ offset = (num_touched_pages - 2) * pb_ptr->page_size +
+ (pb_ptr->page_size - (addr - first_page_addr));
+
+ HDmemcpy(entry_ptr->image_ptr,
+ (const uint8_t *)buf + offset,
+ (size_t)((addr + size) - last_page_addr));
+
+ if ( H5PB__mark_entry_dirty(pb_ptr, entry_ptr) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "mark entry dirty failed (2)")
+ } else {
- page_entry->is_dirty = FALSE;
+ /* this should be un-reachable */
+ HDassert(FALSE);
+
+ }
+ } /* if ( entry_ptr ) */
+
+ search_page++;
+ search_addr += pb_ptr->page_size;
+
+ } /* end for */
+ } else {
+ /* case 4: Raw data write of size less than page size.
+ *
+ * In this case, write the data to the page buffer, loading
+ * pages if necessary.
+ */
+ HDassert(size < pb_ptr->page_size);
+
+ /* first page */
+ offset = addr - first_page_addr;
+
+ if ( (offset + size) <= pb_ptr->page_size ) {
+
+ HDassert(num_touched_pages == 1);
+ length = size;
+
+ } else {
+
+ HDassert(num_touched_pages == 2);
+ length = pb_ptr->page_size - offset;
+ HDassert( offset + length == pb_ptr->page_size );
+ }
+
+ /* get the first page */
+ H5PB__SEARCH_INDEX(pb_ptr, first_page, entry_ptr, FAIL)
+
+ /* update hit rate stats */
+ H5PB__UPDATE_PB_HIT_RATE_STATS(pb_ptr, (entry_ptr != NULL), \
+ FALSE, FALSE)
+
+ if ( ( NULL == entry_ptr ) &&
+ ( H5PB__load_page(f, pb_ptr, first_page_addr,
+ type, &entry_ptr) < 0 ) )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, \
+ "page buffer page load request failed (1)")
+
+ HDassert(entry_ptr);
+ HDassert(entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+ HDassert(entry_ptr->addr == first_page_addr);
+
+
+ /* copy data from the write buffer into the first page */
+ HDmemcpy(((uint8_t *)(entry_ptr->image_ptr)) + offset,
+ (const uint8_t *)buf, length);
+
+ if ( H5PB__mark_entry_dirty(pb_ptr, entry_ptr) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "mark entry dirty failed (3)")
+
+ /* second page, if it exists */
+ if ( num_touched_pages == 2 ) {
+
+ offset = length;
+ length = size - offset;
+
+ HDassert(offset + length == size);
+
+ /* get the first page */
+ H5PB__SEARCH_INDEX(pb_ptr, last_page, entry_ptr, FAIL)
+
+ /* update hit rate stats */
+ H5PB__UPDATE_PB_HIT_RATE_STATS(pb_ptr, (entry_ptr != NULL), \
+ FALSE, FALSE)
+
+ if ( ( NULL == entry_ptr ) &&
+ ( H5PB__load_page(f, pb_ptr, last_page_addr,
+ type, &entry_ptr) < 0 ) )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, \
+ "page buffer page load request failed (2)")
+
+ HDassert(entry_ptr);
+ HDassert(entry_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+ HDassert(entry_ptr->addr == last_page_addr);
+ HDassert(entry_ptr->page == last_page);
+
+ /* copy data from the write buffer into the first page */
+ HDmemcpy((uint8_t *)(entry_ptr->image_ptr),
+ ((const uint8_t *)(buf) + offset), length);
+
+ if ( H5PB__mark_entry_dirty(pb_ptr, entry_ptr) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "mark entry dirty failed (3)")
+ }
+ }
done:
+
FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5PB__write_entry() */
+
+} /* end H5PB__write_raw() */
diff --git a/src/H5PBpkg.h b/src/H5PBpkg.h
index 6b9168b..e71396a 100644
--- a/src/H5PBpkg.h
+++ b/src/H5PBpkg.h
@@ -21,38 +21,1504 @@
/* Get package's private header */
#include "H5PBprivate.h"
-/* Other private headers needed by this file */
+/*
+ * File: H5PBpkg.h
+ *
+ * Purpose: This file contains declarations which are normally visible
+ * only within the H5PB package.
+ *
+ * Source files outside the H5PB package should include
+ * H5PBprivate.h instead.
+ *
+ * Programmer: John Mainzer -- 10/07/18
+ */
/**************************/
/* Package Private Macros */
/**************************/
+/* page buffer configuration settings */
+#define H5PB__H5PB_ENTRY_T_MAGIC 0x02030405
+#define H5PB__DO_SANITY_CHECKS TRUE
+#define H5PB__COLLECT_PAGE_BUFFER_STATS TRUE
+
+
+/****************************************************************************
+ *
+ * We maintain doubly linked lists of instances of H5PB_entry_t for a
+ * variety of reasons -- LRU list, tick list, and the delayed write list
+ * at present. The following macros support linking and unlinking
+ * of instances of H5PB_entry_t by both their regular and tick list next
+ * and previous pointers. Note that the tick list is only used in the
+ * context of VFD SWMR
+ *
+ * The size and length fields are also maintained.
+ *
+ * Note that the relevant pair of prev and next pointers are presumed to be
+ * NULL on entry in the insertion macros.
+ *
+ * Finally, observe that the sanity checking macros evaluate to the empty
+ * string when H5PB__DO_SANITY_CHECKS is FALSE. They also contain calls
+ * to the HGOTO_ERROR macro, which may not be appropriate in all cases.
+ * If so, we will need versions of the insertion and deletion macros which
+ * do not reference the sanity checking macros.
+ * JRM - 10/07/18
+ *
+ ****************************************************************************/
+
+#if H5PB__DO_SANITY_CHECKS
+
+#define H5PB__DLL_PRE_REMOVE_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv) \
+if ( ( (head_ptr) == NULL ) || \
+ ( (tail_ptr) == NULL ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (len) <= 0 ) || \
+ ( (size_t)(Size) < (entry_ptr)->size ) || \
+ ( ( (entry_ptr)->prev == NULL ) && ( (head_ptr) != (entry_ptr) ) ) || \
+ ( ( (entry_ptr)->next == NULL ) && ( (tail_ptr) != (entry_ptr) ) ) || \
+ ( ( (len) == 1 ) && \
+ ( ! ( ( (head_ptr) == (entry_ptr) ) && \
+ ( (tail_ptr) == (entry_ptr) ) && \
+ ( (entry_ptr)->next == NULL ) && \
+ ( (entry_ptr)->prev == NULL ) && \
+ ( (Size) == (int64_t)((entry_ptr)->size) ) \
+ ) \
+ ) \
+ ) \
+ ) { \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "DLL pre remove SC failed") \
+}
+
+#define H5PB__DLL_SC(head_ptr, tail_ptr, len, Size, fv) \
+if ( ( ( ( (head_ptr) == NULL ) || ( (tail_ptr) == NULL ) ) && \
+ ( (head_ptr) != (tail_ptr) ) \
+ ) || \
+ ( (len) < 0 ) || \
+ ( (Size) < 0 ) || \
+ ( ( (len) == 1 ) && \
+ ( ( (head_ptr) != (tail_ptr) ) || \
+ ( (head_ptr) == NULL ) || ( (head_ptr)->size != (size_t)(Size) ) \
+ ) \
+ ) || \
+ ( ( (len) >= 1 ) && \
+ ( ( (head_ptr) == NULL ) || ( (head_ptr)->prev != NULL ) || \
+ ( (tail_ptr) == NULL ) || ( (tail_ptr)->next != NULL ) \
+ ) \
+ ) \
+ ) { \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "DLL sanity check failed") \
+}
+
+#define H5PB__DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv) \
+if ( ( (entry_ptr) == NULL ) || \
+ ( (entry_ptr)->next != NULL ) || \
+ ( (entry_ptr)->prev != NULL ) || \
+ ( ( ( (head_ptr) == NULL ) || ( (tail_ptr) == NULL ) ) && \
+ ( (head_ptr) != (tail_ptr) ) \
+ ) || \
+ ( ( (len) == 1 ) && \
+ ( ( (head_ptr) != (tail_ptr) ) || \
+ ( (head_ptr) == NULL ) || ( (head_ptr)->size != (size_t)(Size) ) \
+ ) \
+ ) || \
+ ( ( (len) >= 1 ) && \
+ ( ( (head_ptr) == NULL ) || ( (head_ptr)->prev != NULL ) || \
+ ( (tail_ptr) == NULL ) || ( (tail_ptr)->next != NULL ) \
+ ) \
+ ) \
+ ) { \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "DLL pre insert SC failed") \
+}
+
+#else /* H5PB__DO_SANITY_CHECKS */
+
+#define H5PB__DLL_PRE_REMOVE_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv)
+#define H5PB__DLL_SC(head_ptr, tail_ptr, len, Size, fv)
+#define H5PB__DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv)
+
+#endif /* H5PB__DO_SANITY_CHECKS */
+
+
+#define H5PB__DLL_APPEND(entry_ptr, head_ptr, tail_ptr, len, Size, fail_val) \
+{ \
+ H5PB__DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, \
+ fail_val) \
+ if ( (head_ptr) == NULL ) \
+ { \
+ (head_ptr) = (entry_ptr); \
+ (tail_ptr) = (entry_ptr); \
+ } \
+ else \
+ { \
+ (tail_ptr)->next = (entry_ptr); \
+ (entry_ptr)->prev = (tail_ptr); \
+ (tail_ptr) = (entry_ptr); \
+ } \
+ (len)++; \
+ (Size) += (int64_t)((entry_ptr)->size); \
+} /* H5PB__DLL_APPEND() */
+
+#define H5PB__DLL_PREPEND(entry_ptr, head_ptr, tail_ptr, len, Size, fail_val) \
+{ \
+ H5PB__DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, \
+ fail_val) \
+ if ( (head_ptr) == NULL ) \
+ { \
+ (head_ptr) = (entry_ptr); \
+ (tail_ptr) = (entry_ptr); \
+ } \
+ else \
+ { \
+ (head_ptr)->prev = (entry_ptr); \
+ (entry_ptr)->next = (head_ptr); \
+ (head_ptr) = (entry_ptr); \
+ } \
+ (len)++; \
+ (Size) += (int64_t)((entry_ptr)->size); \
+} /* H5PB__DLL_PREPEND() */
+
+#define H5PB__DLL_INSERT_BEFORE(entry_ptr, suc_ptr, head_ptr, tail_ptr, len, \
+ Size, fail_val) \
+{ \
+ HDassert( ((suc_ptr) == NULL) || \
+ ((suc_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC) ); \
+ \
+ if ( suc_ptr == NULL ) \
+ /* list empty or no successor -- append */ \
+ H5PB__DLL_APPEND(entry_ptr, head_ptr, tail_ptr, len, Size, fail_val) \
+ \
+ else if ( suc_ptr->prev == NULL ) \
+ /* successor at head of list -- prepend */ \
+ H5PB__DLL_PREPEND(entry_ptr, head_ptr, tail_ptr, len, Size, fail_val) \
+ \
+ else /* sucessor in body of list -- insert before it */ \
+ { \
+ H5PB__DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, \
+ fail_val) \
+ HDassert(suc_ptr->prev->magic == H5PB__H5PB_ENTRY_T_MAGIC); \
+ HDassert(suc_ptr->prev->next == suc_ptr); \
+ entry_ptr->prev = suc_ptr->prev; \
+ entry_ptr->prev->next = entry_ptr; \
+ entry_ptr->next = suc_ptr; \
+ suc_ptr->prev = entry_ptr; \
+ (len)++; \
+ (Size) += (int64_t)((entry_ptr)->size); \
+ } \
+} /* H5PB__DLL_INSERT_BEFORE() */
+
+#define H5PB__DLL_REMOVE(entry_ptr, head_ptr, tail_ptr, len, Size, fail_val) \
+{ \
+ H5PB__DLL_PRE_REMOVE_SC(entry_ptr, head_ptr, tail_ptr, len, Size, \
+ fail_val) \
+ { \
+ if ( (head_ptr) == (entry_ptr) ) \
+ { \
+ (head_ptr) = (entry_ptr)->next; \
+ if ( (head_ptr) != NULL ) \
+ (head_ptr)->prev = NULL; \
+ } \
+ else \
+ (entry_ptr)->prev->next = (entry_ptr)->next; \
+ if ( (tail_ptr) == (entry_ptr) ) \
+ { \
+ (tail_ptr) = (entry_ptr)->prev; \
+ if ( (tail_ptr) != NULL ) \
+ (tail_ptr)->next = NULL; \
+ } \
+ else \
+ (entry_ptr)->next->prev = (entry_ptr)->prev; \
+ entry_ptr->next = NULL; \
+ entry_ptr->prev = NULL; \
+ (len)--; \
+ (Size) -= (int64_t)((entry_ptr)->size); \
+ } \
+} /* H5PB__DLL_REMOVE() */
+
+
+#if H5PB__DO_SANITY_CHECKS
+
+#define H5PB__TL_DLL_PRE_REMOVE_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv) \
+if ( ( (hd_ptr) == NULL ) || \
+ ( (tail_ptr) == NULL ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (len) <= 0 ) || \
+ ( (Size) < (entry_ptr)->size ) || \
+ ( ( (Size) == (entry_ptr)->size ) && ( ! ( (len) == 1 ) ) ) || \
+ ( ( (entry_ptr)->tl_prev == NULL ) && ( (hd_ptr) != (entry_ptr) ) ) || \
+ ( ( (entry_ptr)->tl_next == NULL ) && ( (tail_ptr) != (entry_ptr) ) ) || \
+ ( ( (len) == 1 ) && \
+ ( ! ( ( (hd_ptr) == (entry_ptr) ) && ( (tail_ptr) == (entry_ptr) ) && \
+ ( (entry_ptr)->tl_next == NULL ) && \
+ ( (entry_ptr)->tlx_prev == NULL ) && \
+ ( (Size) == (entry_ptr)->size ) \
+ ) \
+ ) \
+ ) \
+ ) { \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "TL DLL pre remove SC failed") \
+}
+
+#define H5PB__TL_DLL_SC(head_ptr, tail_ptr, len, Size, fv) \
+if ( ( ( ( (head_ptr) == NULL ) || ( (tail_ptr) == NULL ) ) && \
+ ( (head_ptr) != (tail_ptr) ) \
+ ) || \
+ ( (len) < 0 ) || \
+ ( (Size) < 0 ) || \
+ ( ( (len) == 1 ) && \
+ ( ( (head_ptr) != (tail_ptr) ) || ( (Size) <= 0 ) || \
+ ( (head_ptr) == NULL ) || ( (head_ptr)->size != (Size) ) \
+ ) \
+ ) || \
+ ( ( (len) >= 1 ) && \
+ ( ( (head_ptr) == NULL ) || ( (head_ptr)->tl_prev != NULL ) || \
+ ( (tail_ptr) == NULL ) || ( (tail_ptr)->tl_next != NULL ) \
+ ) \
+ ) \
+ ) { \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "TL DLL sanity check failed") \
+}
+
+#define H5PB__TL_DLL_PRE_INSERT_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv) \
+if ( ( (entry_ptr) == NULL ) || \
+ ( (entry_ptr)->tl_next != NULL ) || \
+ ( (entry_ptr)->tl_prev != NULL ) || \
+ ( ( ( (hd_ptr) == NULL ) || ( (tail_ptr) == NULL ) ) && \
+ ( (hd_ptr) != (tail_ptr) ) \
+ ) || \
+ ( ( (len) == 1 ) && \
+ ( ( (hd_ptr) != (tail_ptr) ) || ( (Size) <= 0 ) || \
+ ( (hd_ptr) == NULL ) || ( (int64_t)((hd_ptr)->size) != (Size) ) \
+ ) \
+ ) || \
+ ( ( (len) >= 1 ) && \
+ ( ( (hd_ptr) == NULL ) || ( (hd_ptr)->tl_prev != NULL ) || \
+ ( (tail_ptr) == NULL ) || ( (tail_ptr)->tl_next != NULL ) \
+ ) \
+ ) \
+ ) { \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "TL DLL pre insert SC failed") \
+}
+
+#else /* H5PB__DO_SANITY_CHECKS */
+
+#define H5PB__TL_DLL_PRE_REMOVE_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv)
+#define H5PB__TL_DLL_SC(head_ptr, tail_ptr, len, Size, fv)
+#define H5PB__TL_DLL_PRE_INSERT_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv)
+
+#endif /* H5PB__DO_SANITY_CHECKS */
+
+
+#define H5PB__TL_DLL_APPEND(entry_ptr, head_ptr, tail_ptr, len, Size, fail_val)\
+{ \
+ H5PB__TL_DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, \
+ fail_val) \
+ if ( (head_ptr) == NULL ) \
+ { \
+ (head_ptr) = (entry_ptr); \
+ (tail_ptr) = (entry_ptr); \
+ } \
+ else \
+ { \
+ (tail_ptr)->tl_next = (entry_ptr); \
+ (entry_ptr)->tl_prev = (tail_ptr); \
+ (tail_ptr) = (entry_ptr); \
+ } \
+ (len)++; \
+ (Size) += entry_ptr->size; \
+} /* H5PB__AUX_DLL_APPEND() */
+
+#define H5PB__TL_DLL_PREPEND(entry_ptr, head_ptr, tail_ptr, len, Size, fv) \
+{ \
+ H5PB__TL_DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv) \
+ if ( (head_ptr) == NULL ) \
+ { \
+ (head_ptr) = (entry_ptr); \
+ (tail_ptr) = (entry_ptr); \
+ } \
+ else \
+ { \
+ (head_ptr)->tl_prev = (entry_ptr); \
+ (entry_ptr)->tl_next = (head_ptr); \
+ (head_ptr) = (entry_ptr); \
+ } \
+ (len)++; \
+ (Size) += (int64_t)(entry_ptr->size); \
+} /* H5PB__TL_DLL_PREPEND() */
+
+#define H5PB__TL_DLL_REMOVE(entry_ptr, head_ptr, tail_ptr, len, Size, fv) \
+{ \
+ H5PB__TL_DLL_PRE_REMOVE_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv) \
+ { \
+ if ( (head_ptr) == (entry_ptr) ) \
+ { \
+ (head_ptr) = (entry_ptr)->tl_next; \
+ if ( (head_ptr) != NULL ) \
+ (head_ptr)->tl_prev = NULL; \
+ } \
+ else \
+ (entry_ptr)->tl_prev->tl_next = (entry_ptr)->tl_next; \
+ if ( (tail_ptr) == (entry_ptr) ) \
+ { \
+ (tail_ptr) = (entry_ptr)->tl_prev; \
+ if ( (tail_ptr) != NULL ) \
+ (tail_ptr)->tl_next = NULL; \
+ } \
+ else \
+ (entry_ptr)->tl_next->tl_prev = (entry_ptr)->tl_prev; \
+ entry_ptr->tl_next = NULL; \
+ entry_ptr->tl_prev = NULL; \
+ (len)--; \
+ (Size) -= entry_ptr->size; \
+ } \
+} /* H5PB__TL_DLL_REMOVE() */
+
+
+/***********************************************************************
+ *
+ * Stats collection macros
+ *
+ * The following macros must handle stats collection when this collection
+ * is enabled, and evaluate to the empty string when it is not.
+ *
+ * The sole exception to this rule is
+ * H5PB__UPDATE_PB_HIT_RATE_STATS(), which is always active as
+ * the page buffer hit rate stats are always collected and available.
+ *
+ ***********************************************************************/
+
+#if H5PB__COLLECT_PAGE_BUFFER_STATS
+
+#define H5PB__UPDATE_PB_HIT_RATE_STATS(pb_ptr, hit, is_metadata, is_mpmde) \
+{ \
+ int ii; \
+ \
+ HDassert(pb_ptr); \
+ HDassert((pb_ptr)->magic == H5PB__H5PB_T_MAGIC); \
+ \
+ if ( is_metadata ) { \
+ if ( is_mpmde ) { \
+ ii = H5PB__STATS_MPMDE; \
+ } else { \
+ ii = H5PB__STATS_MD; \
+ } \
+ } else { \
+ ii = H5PB__STATS_RD; \
+ } \
+ if ( hit ) \
+ ((pb_ptr)->hits[ii])++; \
+ else \
+ ((pb_ptr)->misses[ii])++; \
+} /* H5PB__UPDATE_PB_HIT_RATE_STATS */
+
+#define H5PB__UPDATE_HT_SIZE_STATS(pb_ptr) \
+ if ( (pb_ptr)->index_len > (pb_ptr)->max_index_len ) \
+ (pb_ptr)->max_index_len = (pb_ptr)->index_len; \
+ if ( (pb_ptr)->index_size > (pb_ptr)->max_index_size ) \
+ (pb_ptr)->max_index_size = (pb_ptr)->index_size; \
+ if ( (pb_ptr)->curr_md_pages > (pb_ptr)->max_md_pages ) \
+ (pb_ptr)->max_md_pages = (pb_ptr)->curr_md_pages; \
+ if ( (pb_ptr)->curr_rd_pages > (pb_ptr)->max_rd_pages ) \
+ (pb_ptr)->max_rd_pages = (pb_ptr)->curr_rd_pages; \
+ if ( (pb_ptr)->mpmde_count > (pb_ptr)->max_mpmde_count ) \
+ (pb_ptr)->max_rd_pages = (pb_ptr)->curr_rd_pages;
+
+#define H5PB__UPDATE_STATS_FOR_HT_INSERTION(pb_ptr) \
+ ((pb_ptr)->total_ht_insertions)++;
+
+
+#define H5PB__UPDATE_STATS_FOR_HT_DELETION(pb_ptr) \
+ (pb_ptr)->total_ht_deletions++;
+
+#define H5PB__UPDATE_STATS_FOR_HT_SEARCH(pb_ptr, success, depth) \
+ HDassert(depth >= 0); \
+ if ( success ) { \
+ (pb_ptr)->successful_ht_searches++; \
+ (pb_ptr)->total_successful_ht_search_depth += (int64_t)depth; \
+ } else { \
+ (pb_ptr)->failed_ht_searches++; \
+ (pb_ptr)->total_failed_ht_search_depth += (int64_t)depth; \
+ }
+
+#define H5PB__UPDATE_LRU_SIZE_STATS(pb_ptr) \
+ if ( (pb_ptr)->LRU_len > (pb_ptr)->max_lru_len ) \
+ (pb_ptr)->max_lru_len = (pb_ptr)->LRU_len; \
+ if ( (pb_ptr)->LRU_size > (pb_ptr)->max_lru_size ) \
+ (pb_ptr)->max_lru_size = (pb_ptr)->LRU_size;
+
+#define H5PB__UPDATE_STATS_FOR_LRU_MD_SKIP(pb_ptr) \
+ ((pb_ptr)->lru_md_skips)++;
+
+#define H5PB__UPDATE_STATS_FOR_LRU_RD_SKIP(pb_ptr) \
+ ((pb_ptr)->lru_rd_skips)++;
+
+#define H5PB__UPDATE_STATS_FOR_LRU_TL_SKIP(pb_ptr) \
+{ \
+ HDassert(pb_ptr->vfd_swmr_writer); \
+ ((pb_ptr)->lru_tl_skips)++; \
+}
+
+#define H5PB__UPDATE_STATS_FOR_LRU_DWL_SKIP(pb_ptr) \
+{ \
+ HDassert((pb_ptr)->vfd_swmr_writer); \
+ ((pb_ptr)->lru_dwl_skips)++; \
+}
+
+#define H5PB__UPDATE_TL_SIZE_STATS(pb_ptr) \
+{ \
+ HDassert((pb_ptr)->vfd_swmr_writer); \
+ if ( (pb_ptr)->tl_len > (pb_ptr)->max_tl_len ) \
+ (pb_ptr)->max_tl_len = (pb_ptr)->tl_len; \
+ if ( (pb_ptr)->tl_size > (pb_ptr)->max_tl_size ) \
+ (pb_ptr)->max_tl_size = (pb_ptr)->tl_size; \
+}
+
+#define H5PB__UPDATE_DWL_SIZE_STATS(pb_ptr) \
+{ \
+ HDassert((pb_ptr)->vfd_swmr_writer); \
+ if ( (pb_ptr)->dwl_len > (pb_ptr)->max_dwl_len ) \
+ (pb_ptr)->max_dwl_len = (pb_ptr)->dwl_len; \
+ if ( (pb_ptr)->dwl_size > (pb_ptr)->max_dwl_size ) \
+ (pb_ptr)->max_dwl_size = (pb_ptr)->dwl_size; \
+}
+
+#define H5PB__UPDATE_DWL_DELAYED_WRITES(pb_ptr, insertion_depth, delay) \
+{ \
+ HDassert((pb_ptr)->vfd_swmr_writer); \
+ (pb_ptr)delayed_writes++; \
+ (pb_ptr)total_delay += delay; \
+ (pb_ptr)total_dwl_ins_depth += (insertion_depth) \
+}
+
+
+#define H5PB__UPDATE_STATS_FOR_ACCESS(pb_ptr, type, size) \
+{ \
+ int i; \
+ \
+ HDassert(pb_ptr); \
+ HDassert((pb_ptr)->magic == H5PB__H5PB_T_MAGIC); \
+ \
+ if ( H5FD_MEM_DRAW == (type) ) { \
+ i = H5PB__STATS_RD; \
+ } else if ( (size) > (pb_ptr)->page_size ) { \
+ i = H5PB__STATS_MPMDE; \
+ } else { \
+ i = H5PB__STATS_MD; \
+ } \
+ ((pb_ptr)->accesses[i])++; \
+} /* H5PB__UPDATE_STATS_FOR_ACCESS */
+
+
+#define H5PB__UPDATE_STATS_FOR_BYPASS(pb_ptr, type, size) \
+{ \
+ int i; \
+ \
+ HDassert(pb_ptr); \
+ HDassert((pb_ptr)->magic == H5PB__H5PB_T_MAGIC); \
+ \
+ if ( H5FD_MEM_DRAW == (type) ) { \
+ i = H5PB__STATS_RD; \
+ } else if ( (size) > (pb_ptr)->page_size ) { \
+ i = H5PB__STATS_MPMDE; \
+ } else { \
+ i = H5PB__STATS_MD; \
+ } \
+ ((pb_ptr)->bypasses[i])++; \
+} /* H5PB__UPDATE_STATS_FOR_BYPASS */
+
+
+#define H5PB__UPDATE_STATS_FOR_FLUSH(pb_ptr, entry_ptr) \
+{ \
+ int i; \
+ \
+ HDassert(pb_ptr); \
+ HDassert((pb_ptr)->magic == H5PB__H5PB_T_MAGIC); \
+ HDassert(entry_ptr); \
+ HDassert((entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC); \
+ \
+ if ( (entry_ptr)->is_metadata ) { \
+ if ( (entry_ptr)->is_mpmde ) { \
+ i = H5PB__STATS_MPMDE; \
+ } else { \
+ i = H5PB__STATS_MD; \
+ } \
+ } else { \
+ i = H5PB__STATS_RD; \
+ } \
+ ((pb_ptr)->flushes[i])++; \
+} /* H5PB__UPDATE_STATS_FOR_FLUSH */
+
+
+#define H5PB__UPDATE_STATS_FOR_EVICTION(pb_ptr, entry_ptr) \
+{ \
+ int i; \
+ \
+ HDassert(pb_ptr); \
+ HDassert((pb_ptr)->magic == H5PB__H5PB_T_MAGIC); \
+ HDassert(entry_ptr); \
+ HDassert((entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC); \
+ \
+ if ( (entry_ptr)->is_metadata ) { \
+ if ( (entry_ptr)->is_mpmde ) { \
+ i = H5PB__STATS_MPMDE; \
+ } else { \
+ i = H5PB__STATS_MD; \
+ } \
+ } else { \
+ i = H5PB__STATS_RD; \
+ } \
+ ((pb_ptr)->evictions[i])++; \
+} /* H5PB__UPDATE_STATS_FOR_EVICTION */
+
+
+#define H5PB__UPDATE_STATS_FOR_CLEAR(pb_ptr, entry_ptr) \
+{ \
+ int i; \
+ \
+ HDassert(pb_ptr); \
+ HDassert((pb_ptr)->magic == H5PB__H5PB_T_MAGIC); \
+ HDassert(entry_ptr); \
+ HDassert((entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC); \
+ \
+ if ( (entry_ptr)->is_metadata ) { \
+ if ( (entry_ptr)->is_mpmde ) { \
+ i = H5PB__STATS_MPMDE; \
+ } else { \
+ i = H5PB__STATS_MD; \
+ } \
+ } else { \
+ i = H5PB__STATS_RD; \
+ } \
+ ((pb_ptr)->clears[i])++; \
+} /* H5PB__UPDATE_STATS_FOR_CLEAR */
+
+
+#define H5PB__UPDATE_STATS_FOR_INSERTION(pb_ptr, entry_ptr) \
+{ \
+ int i; \
+ \
+ HDassert(pb_ptr); \
+ HDassert((pb_ptr)->magic == H5PB__H5PB_T_MAGIC); \
+ HDassert(entry_ptr); \
+ HDassert((entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC); \
+ \
+ if ( (entry_ptr)->is_metadata ) { \
+ if ( (entry_ptr)->is_mpmde ) { \
+ i = H5PB__STATS_MPMDE; \
+ } else { \
+ i = H5PB__STATS_MD; \
+ } \
+ } else { \
+ i = H5PB__STATS_RD; \
+ } \
+ ((pb_ptr)->insertions[i])++; \
+} /* H5PB__UPDATE_STATS_FOR_INSERTION */
+
+#define H5PB__UPDATE_STATS_FOR_LOAD(pb_ptr, entry_ptr) \
+{ \
+ int i; \
+ \
+ HDassert(pb_ptr); \
+ HDassert((pb_ptr)->magic == H5PB__H5PB_T_MAGIC); \
+ HDassert(entry_ptr); \
+ HDassert((entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC); \
+ \
+ if ( (entry_ptr)->is_metadata ) { \
+ if ( (entry_ptr)->is_mpmde ) { \
+ i = H5PB__STATS_MPMDE; \
+ } else { \
+ i = H5PB__STATS_MD; \
+ } \
+ } else { \
+ i = H5PB__STATS_RD; \
+ } \
+ ((pb_ptr)->loads[i])++; \
+} /* H5PB__UPDATE_STATS_FOR_LOAD */
+
+#else /* H5PB__COLLECT_PAGE_BUFFER_STATS */
+
+#define H5PB__UPDATE_PB_HIT_RATE_STATS(pb_ptr, hit, is_metadata, is_mpmde)
+#define H5PB__UPDATE_HT_SIZE_STATS(pb_ptr)
+#define H5PB__UPDATE_STATS_FOR_HT_INSERTION(pb_ptr)
+#define H5PB__UPDATE_STATS_FOR_HT_DELETION(pb_ptr)
+#define H5PB__UPDATE_HT_SEARCH_STATS(pb_ptr, success, depth)
+#define H5PB__UPDATE_LRU_SIZE_STATS(pb_ptr)
+#define H5PB__UPDATE_STATS_FOR_LRU_MD_SKIP(pb_ptr)
+#define H5PB__UPDATE_STATS_FOR_LRU_RD_SKIP(pb_ptr)
+#define H5PB__UPDATE_STATS_FOR_LRU_TL_SKIP(pb_ptr)
+#define H5PB__UPDATE_STATS_FOR_LRU_DWL_SKIP(pb_ptr)
+#define H5PB__UPDATE_TL_SIZE_STATS(pb_ptr)
+#define H5PB__UPDATE_DWL_SIZE_STATS(pb_ptr)
+#define H5PB__UPDATE_DWL_DELAYED_WRITES(pb_ptr, insertion_depth, delay)
+#define H5PB__UPDATE_STATS_FOR_ACCESS(pb_ptr, type, size)
+#define H5PB__UPDATE_STATS_FOR_BYPASS(pb_ptr, type, size)
+#define H5PB__UPDATE_STATS_FOR_FLUSH(pb_ptr, entry_ptr)
+#define H5PB__UPDATE_STATS_FOR_EVICTION(pb_ptr, entry_ptr)
+#define H5PB__UPDATE_STATS_FOR_CLEAR(pb_ptr, entry_ptr)
+#define H5PB__UPDATE_STATS_FOR_INSERTION(pb_ptr, entry_ptr)
+#define H5PB__UPDATE_STATS_FOR_LOAD(pb_ptr, entry_ptr)
+
+#endif /* H5PB__COLLECT_PAGE_BUFFER_STATS */
+
+
+/***********************************************************************
+ *
+ * Hash table access and manipulation macros:
+ *
+ * The following macros handle searches, insertions, and deletion in
+ * the hash table.
+ *
+ * Note that the input to the hash function is the page of the page
+ * buffer entry, not it address (recall that page * page_size) == addr).
+ *
+ * JRM -- 10/09/18
+ *
+ * Changes:
+ *
+ * - None
+ *
+ ***********************************************************************/
+
+#define H5PB__HASH_MASK ((uint64_t)(H5PB__HASH_TABLE_LEN - 1))
+
+#define H5PB__HASH_FCN(x) (int)(((uint64_t)(x)) & H5PB__HASH_MASK)
+
+#if H5PB__DO_SANITY_CHECKS
+
+#define H5PB__PRE_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (entry_ptr)->magic != H5PB__H5PB_ENTRY_T_MAGIC ) || \
+ ( (entry_ptr)->ht_next != NULL ) || \
+ ( (entry_ptr)->ht_prev != NULL ) || \
+ ( (entry_ptr)->size < pb_ptr->page_size ) || \
+ ( H5PB__HASH_FCN((entry_ptr)->page) < 0 ) || \
+ ( H5PB__HASH_FCN((entry_ptr)->page) >= H5PB__HASH_TABLE_LEN ) || \
+ ( (pb_ptr)->index_len < 0 ) || \
+ ( (pb_ptr)->index_size < 0 ) || \
+ ( (pb_ptr)->curr_pages < 0 ) || \
+ ( (pb_ptr)->curr_rd_pages < 0 ) || \
+ ( (pb_ptr)->curr_md_pages < 0 ) || \
+ ( ((pb_ptr)->curr_pages != \
+ ((pb_ptr)->curr_md_pages + (pb_ptr)->curr_rd_pages)) ) || \
+ ( (pb_ptr)->mpmde_count < 0 ) || \
+ ( (pb_ptr)->index_len != \
+ ((pb_ptr)->curr_pages + (pb_ptr)->mpmde_count) ) ) { \
+ HDassert(FALSE); \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, "pre HT insert SC failed") \
+}
+
+#define H5PB__POST_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
+ ( (entry_ptr)->magic != H5PB__H5PB_ENTRY_T_MAGIC ) || \
+ ( (pb_ptr)->index_len < 1 ) || \
+ ( (pb_ptr)->index_len != \
+ ((pb_ptr)->curr_pages + (pb_ptr)->mpmde_count) ) || \
+ ( (pb_ptr)->index_size < (int64_t)((entry_ptr)->size) ) ) { \
+ HDassert(FALSE); \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, "post HT insert SC failed") \
+}
+
+#define H5PB__PRE_HT_REMOVE_SC(pb_ptr, entry_ptr) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
+ ( (pb_ptr)->index_len < 1 ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (entry_ptr)->magic != H5PB__H5PB_ENTRY_T_MAGIC ) || \
+ ( (entry_ptr)->size < pb_ptr->page_size ) || \
+ ( (pb_ptr)->index_len < 1 ) || \
+ ( (pb_ptr)->index_size < (int64_t)((entry_ptr)->size) ) || \
+ ( ((pb_ptr)->ht)[(H5PB__HASH_FCN((entry_ptr)->page))] \
+ == NULL ) || \
+ ( ( ((pb_ptr)->ht)[(H5PB__HASH_FCN((entry_ptr)->page))] \
+ != (entry_ptr) ) && \
+ ( (entry_ptr)->ht_prev == NULL ) ) || \
+ ( ( ((pb_ptr)->ht)[(H5PB__HASH_FCN((entry_ptr)->page))] == \
+ (entry_ptr) ) && \
+ ( (entry_ptr)->ht_prev != NULL ) ) ) { \
+ HDassert(FALSE); \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "pre HT remove SC failed") \
+}
+
+#define H5PB__POST_HT_REMOVE_SC(pb_ptr, entry_ptr) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (entry_ptr)->magic != H5PB__H5PB_ENTRY_T_MAGIC ) || \
+ ( (entry_ptr)->size < (pb_ptr)->page_size ) || \
+ ( (entry_ptr)->ht_prev != NULL ) || \
+ ( (entry_ptr)->ht_prev != NULL ) || \
+ ( (pb_ptr)->index_len < 0 ) || \
+ ( (pb_ptr)->index_size < 0 ) || \
+ ( (pb_ptr)->curr_pages < 0 ) || \
+ ( (pb_ptr)->curr_rd_pages < 0 ) || \
+ ( (pb_ptr)->curr_md_pages < 0 ) || \
+ ( ((pb_ptr)->curr_pages != \
+ ((pb_ptr)->curr_md_pages + (pb_ptr)->curr_rd_pages)) ) || \
+ ( (pb_ptr)->mpmde_count < 0 ) || \
+ ( (pb_ptr)->index_len != \
+ ((pb_ptr)->curr_pages + (pb_ptr)->mpmde_count) ) ) { \
+ HDassert(FALSE); \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "post HT remove SC failed") \
+}
+
+#define H5PB__PRE_HT_SEARCH_SC(pb_ptr, page, fail_val) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
+ ( H5PB__HASH_FCN(page) < 0 ) || \
+ ( H5PB__HASH_FCN(page) >= H5PB__HASH_TABLE_LEN ) ) { \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, "pre HT search SC failed") \
+}
+
+#define H5PB__POST_SUC_HT_SEARCH_SC(pb_ptr, entry_ptr, k, fail_val) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
+ ( (pb_ptr)->index_len < 1 ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (entry_ptr)->magic != H5PB__H5PB_ENTRY_T_MAGIC ) || \
+ ( (pb_ptr)->index_size < (int64_t)((entry_ptr)->size) ) || \
+ ( (pb_ptr)->index_len < 1 ) || \
+ ( (entry_ptr)->size < (pb_ptr)->page_size ) || \
+ ( ( k < 0 ) || ( k >= H5PB__HASH_TABLE_LEN ) ) || \
+ ( ((pb_ptr)->ht)[k] == NULL ) || \
+ ( ( ((pb_ptr)->ht)[k] != (entry_ptr) ) && \
+ ( (entry_ptr)->ht_prev == NULL ) ) || \
+ ( ( ((pb_ptr)->ht)[k] == (entry_ptr) ) && \
+ ( (entry_ptr)->ht_prev != NULL ) ) || \
+ ( ( (entry_ptr)->ht_prev != NULL ) && \
+ ( (entry_ptr)->ht_prev->ht_next != (entry_ptr) ) ) || \
+ ( ( (entry_ptr)->ht_next != NULL ) && \
+ ( (entry_ptr)->ht_next->ht_prev != (entry_ptr) ) ) ) { \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, \
+ "post successful HT search SC failed") \
+}
+
+#define H5PB__POST_HT_SHIFT_TO_FRONT_SC(pb_ptr, entry_ptr, k, fail_val) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( ((pb_ptr)->ht)[k] != (entry_ptr) ) || \
+ ( (entry_ptr)->ht_prev != NULL ) ) { \
+ HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, \
+ "post HT shift to front SC failed") \
+}
+
+#else /* H5PB__DO_SANITY_CHECKS */
+
+#define H5PB__PRE_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val)
+#define H5PB__POST_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val)
+#define H5PB__PRE_HT_REMOVE_SC(pb_ptr, entry_ptr)
+#define H5PB__POST_HT_REMOVE_SC(pb_ptr, entry_ptr)
+#define H5PB__PRE_HT_SEARCH_SC(pb_ptr, page, fail_val)
+#define H5PB__POST_SUC_HT_SEARCH_SC(pb_ptr, entry_ptr, k, fail_val)
+#define H5PB__POST_HT_SHIFT_TO_FRONT_SC(pb_ptr, entry_ptr, k, fail_val)
+
+#endif /* H5PB__DO_SANITY_CHECKS */
+
+#define H5PB__INSERT_IN_INDEX(pb_ptr, entry_ptr, fail_val) \
+{ \
+ int k; \
+ H5PB__PRE_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val) \
+ k = H5PB__HASH_FCN((entry_ptr)->page); \
+ if(((pb_ptr)->ht)[k] != NULL) { \
+ (entry_ptr)->ht_next = ((pb_ptr)->ht)[k]; \
+ (entry_ptr)->ht_next->ht_prev = (entry_ptr); \
+ } \
+ ((pb_ptr)->ht)[k] = (entry_ptr); \
+ (pb_ptr)->index_len++; \
+ (pb_ptr)->index_size += (int64_t)((entry_ptr)->size); \
+ if ( (entry_ptr)->is_metadata ) { \
+ if ( (entry_ptr)->is_mpmde ) { \
+ ((pb_ptr)->mpmde_count)++; \
+ } else { \
+ ((pb_ptr)->curr_md_pages)++; \
+ (pb_ptr)->curr_pages++; \
+ } \
+ } else { \
+ ((pb_ptr)->curr_rd_pages)++; \
+ (pb_ptr)->curr_pages++; \
+ } \
+ H5PB__UPDATE_STATS_FOR_HT_INSERTION(pb_ptr) \
+ H5PB__UPDATE_HT_SIZE_STATS(pb_ptr) \
+ H5PB__POST_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val) \
+}
+
+#define H5PB__DELETE_FROM_INDEX(pb_ptr, entry_ptr, fail_val) \
+{ \
+ int k; \
+ H5PB__PRE_HT_REMOVE_SC(pb_ptr, entry_ptr) \
+ k = H5PB__HASH_FCN((entry_ptr)->page); \
+ if((entry_ptr)->ht_next) \
+ (entry_ptr)->ht_next->ht_prev = (entry_ptr)->ht_prev; \
+ if((entry_ptr)->ht_prev) \
+ (entry_ptr)->ht_prev->ht_next = (entry_ptr)->ht_next; \
+ if(((pb_ptr)->ht)[k] == (entry_ptr)) \
+ ((pb_ptr)->ht)[k] = (entry_ptr)->ht_next; \
+ (entry_ptr)->ht_next = NULL; \
+ (entry_ptr)->ht_prev = NULL; \
+ (pb_ptr)->index_len--; \
+ (pb_ptr)->index_size -= (int64_t)((entry_ptr)->size); \
+ if ( (entry_ptr)->is_metadata ) { \
+ if ( (entry_ptr)->is_mpmde ) { \
+ ((pb_ptr)->mpmde_count)--; \
+ } else { \
+ ((pb_ptr)->curr_md_pages)--; \
+ (pb_ptr)->curr_pages--; \
+ } \
+ } else { \
+ ((pb_ptr)->curr_rd_pages)--; \
+ (pb_ptr)->curr_pages--; \
+ } \
+ H5PB__UPDATE_STATS_FOR_HT_DELETION(pb_ptr) \
+ H5PB__POST_HT_REMOVE_SC(pb_ptr, entry_ptr) \
+}
+
+#define H5PB__SEARCH_INDEX(pb_ptr, pg, entry_ptr, f_val) \
+{ \
+ int k; \
+ int depth = 0; \
+ H5PB__PRE_HT_SEARCH_SC((pb_ptr), (pg), (f_val)) \
+ k = H5PB__HASH_FCN((pg)); \
+ entry_ptr = ((pb_ptr)->ht)[k]; \
+ while ( entry_ptr ) { \
+ if ( (pg) == (entry_ptr)->page ) { \
+ H5PB__POST_SUC_HT_SEARCH_SC(pb_ptr, entry_ptr, k, f_val) \
+ if ( entry_ptr != ((pb_ptr)->ht)[k] ) { \
+ if ( (entry_ptr)->ht_next ) \
+ (entry_ptr)->ht_next->ht_prev = (entry_ptr)->ht_prev; \
+ HDassert((entry_ptr)->ht_prev != NULL); \
+ (entry_ptr)->ht_prev->ht_next = (entry_ptr)->ht_next; \
+ ((pb_ptr)->ht)[k]->ht_prev = (entry_ptr); \
+ (entry_ptr)->ht_next = ((pb_ptr)->ht)[k]; \
+ (entry_ptr)->ht_prev = NULL; \
+ ((pb_ptr)->ht)[k] = (entry_ptr); \
+ H5PB__POST_HT_SHIFT_TO_FRONT_SC(pb_ptr, entry_ptr, k, f_val) \
+ } \
+ break; \
+ } \
+ (entry_ptr) = (entry_ptr)->ht_next; \
+ (depth)++; \
+ } \
+ H5PB__UPDATE_STATS_FOR_HT_SEARCH(pb_ptr, (entry_ptr != NULL), depth) \
+}
+
+
+/***********************************************************************
+ *
+ * Replacement policy update macros
+ *
+ * The following macros handle updates to the replacement policy for
+ * insertions, flushes, and evictions.
+ *
+ * At present, the only replacement policy is a modified LRU policy.
+ *
+ * JRM -- 10/09/18
+ *
+ ***********************************************************************/
+
+/*-------------------------------------------------------------------------
+ *
+ * Macro: H5PB__UPDATE_RP_FOR_EVICTION
+ *
+ * Purpose: Update the replacement policy data structures for an
+ * eviction of the specified page buffer entry.
+ *
+ * At present, we only support the modified LRU policy, so
+ * this function deals with that case unconditionally. If
+ * we ever support other replacement policies, the function
+ * should switch on the current policy and act accordingly.
+ *
+ * Return: Non-negative on success/Negative on failure.
+ *
+ * Programmer: John Mainzer, 10/09/18
+ *
+ * Modifications:
+ *
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define H5PB__UPDATE_RP_FOR_EVICTION(pb_ptr, entry_ptr, fail_val) \
+{ \
+ HDassert( (pb_ptr) ); \
+ HDassert( (pb_ptr)->magic == H5PB__H5PB_T_MAGIC ); \
+ HDassert( (entry_ptr) ); \
+ HDassert( (entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC ); \
+ HDassert( !((entry_ptr)->is_dirty) ); \
+ HDassert( (entry_ptr)->size >= pb_ptr->page_size ); \
+ \
+ /* modified LRU specific code */ \
+ \
+ /* remove the entry from the LRU list. */ \
+ \
+ H5PB__DLL_REMOVE((entry_ptr), (pb_ptr)->LRU_head_ptr, \
+ (pb_ptr)->LRU_tail_ptr, (pb_ptr)->LRU_len, \
+ (pb_ptr)->LRU_size, (fail_val)) \
+ \
+ /* End modified LRU specific code. */ \
+ \
+} /* H5PB__UPDATE_RP_FOR_EVICTION */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Macro: H5PB__UPDATE_RP_FOR_ACCESS
+ *
+ * Purpose: Update the replacement policy data structures for an
+ * access of the specified page buffer entry.
+ *
+ * At present, we only support the modified LRU policy, so
+ * this function deals with that case unconditionally. If
+ * we ever support other replacement policies, the function
+ * should switch on the current policy and act accordingly.
+ *
+ * Return: N/A
+ *
+ * Programmer: John Mainzer, 10/09/18
+ *
+ * Modifications:
+ *
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, fail_val) \
+{ \
+ HDassert( (pb_ptr) ); \
+ HDassert( (pb_ptr)->magic == H5PB__H5PB_T_MAGIC ); \
+ HDassert( (entry_ptr) ); \
+ HDassert( (entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC ); \
+ HDassert( (entry_ptr)->size >= pb_ptr->page_size ); \
+ \
+ /* modified LRU specific code */ \
+ \
+ /* Move entry to the head of the LRU */ \
+ \
+ H5PB__DLL_REMOVE((entry_ptr), (pb_ptr)->LRU_head_ptr, \
+ (pb_ptr)->LRU_tail_ptr, (pb_ptr)->LRU_len, \
+ (pb_ptr)->LRU_size, (fail_val)) \
+ \
+ H5PB__DLL_PREPEND((entry_ptr), (pb_ptr)->LRU_head_ptr, \
+ (pb_ptr)->LRU_tail_ptr, (pb_ptr)->LRU_len, \
+ (pb_ptr)->LRU_size, (fail_val)) \
+ \
+ /* End modified LRU specific code. */ \
+ \
+} /* H5PB__UPDATE_RP_FOR_ACCESS */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Macro: H5PB__UPDATE_RP_FOR_FLUSH
+ *
+ * Purpose: Update the replacement policy data structures for a flush
+ * of the specified page buffer entry.
+ *
+ * At present, we only support the modified LRU policy, so
+ * this function deals with that case unconditionally. If
+ * we ever support other replacement policies, the function
+ * should switch on the current policy and act accordingly.
+ *
+ * Return: N/A
+ *
+ * Programmer: John Mainzer, 10/09/18
+ *
+ * Modifications:
+ *
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define H5PB__UPDATE_RP_FOR_FLUSH(pb_ptr, entry_ptr, fail_val) \
+{ \
+ H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, fail_val) \
+ \
+} /* H5PB__UPDATE_RP_FOR_FLUSH */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Macro: H5PB__UPDATE_RP_FOR_INSERT_APPEND
+ *
+ * Purpose: Update the replacement policy data structures for an
+ * insertion of the specified cache entry.
+ *
+ * Unlike H5PB__UPDATE_RP_FOR_INSERTION below, mark the
+ * new entry as the LEAST recently used entry, not the
+ * most recently used.
+ *
+ * At present, we only support the modified LRU policy, so
+ * this function deals with that case unconditionally. If
+ * we ever support other replacement policies, the function
+ * should switch on the current policy and act accordingly.
+ *
+ * Return: N/A
+ *
+ * Programmer: John Mainzer, 10/10/18
+ *
+ * Modifications:
+ *
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define H5PB__UPDATE_RP_FOR_INSERT_APPEND(pb_ptr, entry_ptr, fail_val) \
+{ \
+ HDassert( (pb_ptr) ); \
+ HDassert( (pb_ptr)->magic == H5PB__H5PB_T_MAGIC ); \
+ HDassert( (entry_ptr) ); \
+ HDassert( (entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC ); \
+ HDassert( (entry_ptr)->size >= pb_ptr->page_size ); \
+ \
+ /* modified LRU specific code */ \
+ \
+ /* insert the entry at the tail of the LRU list. */ \
+ \
+ H5PB__DLL_APPEND((entry_ptr), (pb_ptr)->LRU_head_ptr, \
+ (pb_ptr)->LRU_tail_ptr, (pb_ptr)->LRU_len, \
+ (pb_ptr)->LRU_size, (fail_val)) \
+ \
+ H5PB__UPDATE_LRU_SIZE_STATS(pb_ptr) \
+ \
+ /* End modified LRU specific code. */ \
+}
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Macro: H5PB__UPDATE_RP_FOR_INSERTION
+ *
+ * Purpose: Update the replacement policy data structures for an
+ * insertion of the specified cache entry.
+ *
+ * At present, we only support the modified LRU policy, so
+ * this function deals with that case unconditionally. If
+ * we ever support other replacement policies, the function
+ * should switch on the current policy and act accordingly.
+ *
+ * Return: N/A
+ *
+ * Programmer: John Mainzer, 10/10/18
+ *
+ * Modifications:
+ *
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define H5PB__UPDATE_RP_FOR_INSERTION(pb_ptr, entry_ptr, fail_val) \
+{ \
+ HDassert( (pb_ptr) ); \
+ HDassert( (pb_ptr)->magic == H5PB__H5PB_T_MAGIC ); \
+ HDassert( (entry_ptr) ); \
+ HDassert( (entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC ); \
+ HDassert( (entry_ptr)->size >= pb_ptr->page_size ); \
+ \
+ /* modified LRU specific code */ \
+ \
+ /* insert the entry at the head of the LRU list. */ \
+ \
+ H5PB__DLL_PREPEND((entry_ptr), (pb_ptr)->LRU_head_ptr, \
+ (pb_ptr)->LRU_tail_ptr, (pb_ptr)->LRU_len, \
+ (pb_ptr)->LRU_size, (fail_val)) \
+ \
+ H5PB__UPDATE_LRU_SIZE_STATS(pb_ptr) \
+ \
+ /* End modified LRU specific code. */ \
+}
+
+
+/***********************************************************************
+ *
+ * Tick list management macros
+ *
+ * When the target file is opened in VFD SWMR writer mode, the page
+ * buffer must retain copies of all metadata writes during each tick so
+ * that the metadata file can be updated correctly in end of tick
+ * processing.
+ *
+ * Once tick processing is complete, all entries are removed from the
+ * tick list, to leave it empty for the next tick. Metadata pages from
+ * the tick list are already in the replacement policy, and thus require
+ * no further action.
+ *
+ * Multi-page metadata entries are evicted from the page buffer if they
+ * are not subject to delayed write, or left in the delayed write list
+ * for later flush and eviction if they are.
+ *
+ * The macros required to support this are defined below.
+ *
+ * JRM -- 10/09/18
+ *
+ ***********************************************************************/
+
+/*-------------------------------------------------------------------------
+ *
+ * Macro: H5PB__INSERT_IN_TL
+ *
+ * Purpose: Insert the specified page buffer entry at the head of the
+ * tick list.
+ *
+ * Return: N/A
+ *
+ * Programmer: John Mainzer, 10/10/18
+ *
+ * Modifications:
+ *
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define H5PB__INSERT_IN_TL(pb_ptr, entry_ptr, fail_val) \
+{ \
+ HDassert( (pb_ptr) ); \
+ HDassert( (pb_ptr)->magic == H5PB__H5PB_T_MAGIC ); \
+ HDassert( (pb_ptr)->vfd_swmr_writer ); \
+ HDassert( (entry_ptr) ); \
+ HDassert( (entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC ); \
+ HDassert( (entry_ptr)->modified_this_tick ); \
+ HDassert( (entry_ptr)->size >= pb_ptr->page_size ); \
+ \
+ /* insert the entry at the head of the tick list. */ \
+ \
+ H5PB__TL_DLL_PREPEND((entry_ptr), (pb_ptr)->tl_head_ptr, \
+ (pb_ptr)->tl_tail_ptr, (pb_ptr)->tl_len, \
+ (pb_ptr)->tl_size, (fail_val)) \
+ \
+ H5PB__UPDATE_TL_SIZE_STATS(pb_ptr) \
+ \
+} /* H5PB__INSERT_IN_TL */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Macro: H5PB__REMOVE_FROM_TL
+ *
+ * Purpose: Remove the specified page buffer entry from the tick list.
+ *
+ * Return: N/A
+ *
+ * Programmer: John Mainzer, 10/10/18
+ *
+ * Modifications:
+ *
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define H5PB__REMOVE_FROM_TL(pb_ptr, entry_ptr, fail_val) \
+{ \
+ HDassert( (pb_ptr) ); \
+ HDassert( (pb_ptr)->magic == H5PB__H5PB_T_MAGIC ); \
+ HDassert( (pb_ptr)->vfd_swmr_writer ) \
+ HDassert( (entry_ptr) ); \
+ HDassert( (entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC ); \
+ HDassert( (entry_ptr)->modified_this_tick ); \
+ HDassert( (entry_ptr)->size >= pb_ptr->page_size ); \
+ \
+ /* remove the entry from the tick list. */ \
+ \
+ H5PB__TL_DLL_REMOVE((entry_ptr), (pb_ptr)->tl_head_ptr, \
+ (pb_ptr)->tl_tail_ptr, (pb_ptr)->tl_len, \
+ (pb_ptr)->tl_size, (fail_val)) \
+ \
+ \
+} /* H5PB__REMOVE_FROM_TL */
+
+
+/***********************************************************************
+ *
+ * Delayed write list management macros
+ *
+ * When the target file is opened in VFD SWMR writer mode, the page
+ * buffer must delay flush of all metadata pages and multi-page metadata
+ * entries that:
+ *
+ * 1) have not appeared in the metadata file index for at least max_lag
+ * ticks, and
+ *
+ * 2) a previous version of the metadata page or multi-page metadata
+ * cache entry exists in the file.
+ *
+ * Failure to do so can result in VFD SWMR readers to receive messages
+ * from the future.
+ *
+ * To minimize overhead, the delayed write list is sorted in decreasing
+ * values of the constituent delay_write_until fields.
+ *
+ * Entries are removed from the delayed write list when their
+ * delay_write_until fields are satisfied. Metadata pages are inserted
+ * at the bottom of the replacement policy, and multi-page metadata
+ * entries are immediately flushed and evicted.
+ *
+ * The macros required to support this are defined below.
+ *
+ * JRM -- 10/09/18
+ *
+ ***********************************************************************/
+
+/*-------------------------------------------------------------------------
+ *
+ * Macro: H5PB__INSERT_IN_DWL
+ *
+ * Insert the supplied page buffer entry in the delayed write list
+ * maintaining the invarient:
+ *
+ * entry_ptr->next == NULL ||
+ * entry_ptr->delay_write_until >= entry_ptr->next->delay_write_until
+ *
+ * In passing update pb_ptr->max_delay if appropriate.
+ *
+ * Return: N/A
+ *
+ * Programmer: John Mainzer, 10/10/18
+ *
+ * Modifications:
+ *
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define H5PB__INSERT_IN_DWL(pb_ptr, entry_ptr, fail_val) \
+{ \
+ int insertion_depth = 0; \
+ uint64_t delay; \
+ H5PB_entry_t * suc_ptr; \
+ \
+ HDassert( (pb_ptr) ); \
+ HDassert( (pb_ptr)->magic == H5PB__H5PB_T_MAGIC ); \
+ HDassert( (pb_ptr)->vfd_swmr_writer ) \
+ HDassert( (entry_ptr) ); \
+ HDassert( (entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC ); \
+ HDassert( (entry_ptr)->size >= pb_ptr->page_size ); \
+ HDassert( (entry_ptr)->delay_write_until > (pb_ptr)->cur_tick ); \
+ \
+ delay = (entry_ptr)->delay_write_until - (pb_ptr)->cur_tick; \
+ suc_ptr = pb_ptr->dwl_head_ptr; \
+ \
+ while ( (suc_ptr) && \
+ ((suc_ptr)->delay_write_until > (entry_ptr)->delay_write_until) ) \
+ { \
+ insertion_depth++; \
+ suc_ptr = suc_ptr->next; \
+ } \
+ \
+ H5PB__DLL_INSERT_BEFORE((entry_ptr), (suc_ptr), (pb_ptr)->dwl_head_ptr, \
+ (pb_ptr)->dwl_tail_ptr, (pb_ptr)->dwl_len, \
+ (pb_ptr)->dwl_size), (fail_val)) \
+ \
+ if ( entry_ptr->delay_write_until > pb_ptr->max_delay ) \
+ pb_ptr->max_delay = entry_ptr->delay_write_until; \
+ \
+ H5PB__UPDATE_DWL_SIZE_STATS(pb_ptr) \
+ H5PB__UPDATE_DWL_DELAYED_WRITES(pb_ptr, insertion_depth, delay) \
+ \
+} /* H5PB__INSERT_IN_DWL */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Macro: H5PB__REMOVE_FROM_DWL
+ *
+ * Purpose: Remove the specified page buffer entry from the delayed
+ * write list.
+ *
+ * Return: N/A
+ *
+ * Programmer: John Mainzer, 10/10/18
+ *
+ * Modifications:
+ *
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#define H5PB__REMOVE_FROM_DWL(pb_ptr, entry_ptr, fail_val) \
+{ \
+ HDassert( (pb_ptr) ); \
+ HDassert( (pb_ptr)->magic == H5PB__H5PB_T_MAGIC ); \
+ HDassert( (pb_ptr)->vfd_swmr_writer ) \
+ HDassert( (entry_ptr) ); \
+ HDassert( (entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC ); \
+ HDassert( (entry_ptr)->size >= pb_ptr->page_size ); \
+ HDassert( (entry_ptr)->delay_write_until < (pb_ptr)->cur_tick ); \
+ \
+ /* remove the entry from the delayed write list. */ \
+ \
+ H5PB__TL_DLL_REMOVE((entry_ptr), (pb_ptr)->dwl_head_ptr, \
+ (pb_ptr)->dwl_tail_ptr, (pb_ptr)->dwl_len, \
+ (pb_ptr)->dwl_size, (fail_val)) \
+ \
+ \
+} /* H5PB__REMOVE_FROM_DWLL */
+
+
/****************************/
/* Package Private Typedefs */
/****************************/
-typedef struct H5PB_entry_t {
- void *page_buf_ptr; /* Pointer to the buffer containing the data */
- haddr_t addr; /* Address of the page in the file */
- H5F_mem_page_t type; /* Type of the page entry (H5F_MEM_PAGE_RAW/META) */
- hbool_t is_dirty; /* Flag indicating whether the page has dirty data or not */
+/****************************************************************************
+ *
+ * structure H5PB_entry_t
+ *
+ * Individual instances of the H5PB_entry_t structure are used to manage
+ * individual pages in the page buffer. In the case of a VFD SWMR writer,
+ * they are also used to manage multi-page metadata entries.
+ *
+ * The fields of this structure are discussed below:
+ *
+ * JRM - 9/27/18
+ *
+ * magic: Unsigned 32 bit integer that must always be set to
+ * H5PB__H5PB_ENTRY_T_MAGIC when the entry is valid.
+ *
+ * pb_ptr: Pointer to the page buffer that contains this entry.
+ *
+ * addr: Base address of the page in the file.
+ *
+ * page: Page offset of the page -- i.e. addr / pb_ptr->page_size.
+ * Note that addr must always equal page * pb_ptr->page_size.
+ *
+ * size: Size of the page buffer entry in bytes. Under normal
+ * circumstance, this will always be equal to pb_ptr->page_size.
+ * However, in the context of a VFD SWMR writer, the page
+ * buffer may be used to store multi-page metadata entries
+ * until the end of tick, or to delay writes of such entries
+ * for up to max_lag ticks.
+ *
+ * In such cases, size must be greater than pb_ptr->page_size.
+ *
+ * image_ptr: Pointer to void. When not NULL, this field points to a
+ * dynamically allocated block of size bytes in which the
+ * on disk image of the page. In the context of VFD SWMR,
+ * it points to the image of the multi-page metadata entry.
+ *
+ * mem_type: Type (H5F_mem_t) of the page buffer entry. This value
+ * is needed when reading or writing the entry from/to file.
+ *
+ * is_metadata: Boolean flag that is set to TRUE iff the associated
+ * entry is a page of metadata (or, in the context of VFD
+ * SWMR, a multi-page metadata entry).
+ *
+ * is_dirty: Boolean flag indicating whether the contents of the page
+ * buffer entry has been modified since the last time it
+ * was written to disk.
+ *
+ *
+ * Fields supporting the hash table:
+ *
+ * Entries in the page buffer are indexed by a more or less conventional
+ * hash table with chaining (see header comment on H5PB_t for futher details).
+ * If there are multiple entries in any hash bin, they are stored in a doubly
+ * linked list.
+ *
+ * ht_next: Next pointer used by the hash table to store multiple
+ * entries in a single hash bin. This field points to the
+ * next entry in the doubly linked list of entries in the
+ * hash bin, or NULL if there is no next entry.
+ *
+ * ht_prev: Prev pointer used by the hash table to store multiple
+ * entries in a single hash bin. This field points to the
+ * previous entry in the doubly linked list of entries in
+ * the hash bin, or NULL if there is no previuos entry.
+ *
+ *
+ * Fields supporting replacement policies:
+ *
+ * The page buffer must have a replacement policy, and it will usually be
+ * necessary for this structure to contain fields supporting that policy.
+ *
+ * At present, only a modified LRU replacement policy is contemplated,
+ * (see header comment for H5PB_t for details), for which the following
+ * fields are adequate.
+ *
+ * next: Next pointer in either the LRU, or (in the context of
+ * VFD SWMR) the delayed write list. If there is no next entry
+ * on the list, this field should be set to NULL.
+ *
+ * prev: Prev pointer in either the LRU, or (in the context of
+ * VFD SWMR) the delayed write list. If there is no previous
+ * entry on the list, this field should be set to NULL.
+ *
+ * Fields supporting VFD SWMR:
+ *
+ * is_mpmde: Boolean flag that is set to TRUE iff the entry
+ * is a multi-page metadata entry. In the absense of VFD
+ * SWMR, the field should always be set to FALSE.
+ *
+ * Observe that:
+ *
+ * is_mpmde <==> is_metadata && size > pb_ptr->page_size
+ *
+ * loaded: Boolean flag that is set to TRUE iff the entry was loaded
+ * from file. This is a necessary input in determining
+ * whether the write of the entry must be delayed.
+ *
+ * This field is only maintained in the VFD SWMR case
+ * and should be false otherwise.
+ *
+ * modified_this_tick: This field is set to TRUE iff pb_ptr->vfd_swrm_write
+ * and the entry has been modified in the current tick. If
+ * modified_this_tick is TRUE, the entry must also be in the
+ * tick list.
+ *
+ * delay_write_until: Unsigned 64 bit integer containing the first tick
+ * in which the entry may be written to file, or 0 if there
+ * is no such constraint. It should be set ot 0 when VFD
+ * is not enabled.
+ *
+ * tl_next: Next pointer on the list of entries modified in the current
+ * tick, If the enty is not on the tick list, or if there is
+ * no next entry on the list, this field should be set to NULL.
+ *
+ * tl_prev: Prev pointer on the list of entries modified in the current
+ * tick, If the enty is not on the tick list, or if there is
+ * no previous entry on the list, this field should be set to
+ * NULL.
+ *
+ ****************************************************************************/
+
- /* Fields supporting replacement policies */
- struct H5PB_entry_t *next; /* next pointer in the LRU list */
- struct H5PB_entry_t *prev; /* previous pointer in the LRU list */
-} H5PB_entry_t;
+#define H5PB__H5PB_ENTRY_T_MAGIC 0x02030405
+struct H5PB_entry_t {
-/*****************************/
-/* Package Private Variables */
-/*****************************/
+ uint32_t magic;
+ H5PB_t *pb_ptr;
+ haddr_t addr;
+ uint64_t page;
+ size_t size;
+ void *image_ptr;
+ H5FD_mem_t mem_type;
+ hbool_t is_metadata;
+ hbool_t is_dirty;
+ /* fields supporting the hash table: */
+ struct H5PB_entry_t *ht_next;
+ struct H5PB_entry_t *ht_prev;
-/******************************/
-/* Package Private Prototypes */
-/******************************/
+ /* fields supporting replacement policies: */
+ struct H5PB_entry_t *next;
+ struct H5PB_entry_t *prev;
+ /* fields supporting VFD SWMR */
+ hbool_t is_mpmde;
+ hbool_t loaded;
+ hbool_t modified_this_tick;
+ uint64_t delay_write_until;
+ struct H5PB_entry_t *tl_next;
+ struct H5PB_entry_t *tl_prev;
+
+}; /* H5PB_entry_t */
#endif /* _H5PBpkg_H */
diff --git a/src/H5PBprivate.h b/src/H5PBprivate.h
index b94b845..2c1f3cb 100644
--- a/src/H5PBprivate.h
+++ b/src/H5PBprivate.h
@@ -11,68 +11,530 @@
* help@hdfgroup.org. *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-/*-------------------------------------------------------------------------
+/*
+ * File: H5PBprivate.h
*
- * Created: H5PBprivate.h
- * June 2014
- * Mohamad Chaarawi
+ * Purpose: This file contains declarations which are normally visible
+ * within the HDF5 library, but are not visible at the user
+ * level
*
- *-------------------------------------------------------------------------
+ * Programmer: John Mainzer -- 10/07/18
*/
#ifndef _H5PBprivate_H
#define _H5PBprivate_H
/* Include package's public header */
-#ifdef NOT_YET
-#include "H5PBpublic.h"
-#endif /* NOT_YET */
+
+/* no H5PBpublic.h at present */
+
/* Private headers needed by this header */
#include "H5private.h" /* Generic Functions */
-#include "H5Fprivate.h" /* File access */
-#include "H5FLprivate.h" /* Free Lists */
-#include "H5SLprivate.h" /* Skip List */
/**************************/
/* Library Private Macros */
/**************************/
+#define H5PB__HASH_TABLE_LEN 4096 /* must be a power of 2 */
+
/****************************/
/* Library Private Typedefs */
/****************************/
-/* Forward declaration for a page buffer entry */
-struct H5PB_entry_t;
+/* Typedef for the page buffer entry structure (defined in H5PBpkg.h) */
+typedef struct H5PB_entry_t H5PB_entry_t;
+
+
+
+/******************************************************************************
+ *
+ * structure H5PB_t
+ *
+ * Catchall structure for all variables specific to an instance of the page
+ * buffer.
+ *
+ * At present, the page buffer serves two purposes in the HDF5 library.
+ *
+ * Under normal operating conditions, it serves as a normal page buffer whose
+ * purpose is to minimize and optimize file I/O by aggregating small metadata
+ * and raw data writes into pages, and by caching frequently used pages.
+ *
+ * In addition, when a file is opened for VFD SWMR writing, the page buffer is
+ * used to retain copies of all metadata pages and multi-page metadata entries
+ * that are written in a given tick, and under certain cases, to delay metadata
+ * page and/or multi-page metadata entry writes for some number of ticks.
+ * If the entry has not appeared in the VFD SWMR index for at least max_lag
+ * ticks, this is necessary to avoid message from the future bugs. See the
+ * VFD SWMR RFC for further details.
+ *
+ * To reflect this, the fields of this structure are divided into three
+ * sections. Specifically fields needed for general operations, fields needed
+ * for VFD SWMR, and statistics.
+ *
+ * FIELDS FOR GENERAL OPERATIONS:
+ *
+ * magic: Unsigned 32 bit integer that must always be set to
+ * H5PB__H5PB_T_MAGIC. This field is used to validate pointers to
+ * instances of H5PB_t.
+ *
+ * page_size: size_t containing the page buffer page size in bytes.
+ *
+ * max_pages: 64 bit integer containing the nominal maximum number
+ * of pages in the page buffer. Note that on creation, the page
+ * buffer is empty, and that under certain circumstances (mostly
+ * related to VFD SWMR) this limit can be exceeded by large
+ * amounts.
+ *
+ * curr_pages: 64 bit integer containing the current number of pages
+ * in the page buffer. curr_pages must always equal the sum of
+ * curr_md_pages + curr_rd_pages.
+ *
+ * Note that in the context of VFD SWMR, this count does NOT
+ * include multi-page metadata entries.
+ *
+ * curr_md_pages: 64 bit integer containing the current number of
+ * metadata pages in the page buffer.
+ *
+ * Note that in the context of VFD SWMR, this count does NOT
+ * include multi-page metadata entries.
+ *
+ * curr_rd_pages: 64 bit integer containing the current number of
+ * raw data pages in the page buffer.
+ *
+ * min_md_pages: 64 bit integer containing the number of pages in the
+ * page buffer reserved for metadata. No metadata page may be
+ * evicted from the page buffer if curr_md_pages is less than or
+ * equal to this value.
+ *
+ * min_rd_pages: 64 bin integer containing the number of pages in the
+ * page buffer reserved for raw data. No page or raw data may be
+ * evicted from the page buffer if curr_rd_pages is less than or
+ * equal to this value.
+ *
+ * The FAPL fields are used to store the page buffer configuration data
+ * provided to the page buffer in the H5PB_create() call.
+ *
+ * max_size: Maximum page buffer size supplied by the FAPL.
+ *
+ * min_meta_perc: Percent of the page buffer reserved for metadata as
+ * supplied in the FAPL.
+ *
+ * min_raw_perc: Percent of the page buffer reserved for metadata as
+ * supplied in the FAPL.
+ *
+ * The purpose of the index is to allow us to efficiently look up all pages
+ * (and multi-page metadata entries in the context of VFD SWMR) in the
+ * page buffer.
+ *
+ * This function is provided by a hash table with chaining, albeit with one
+ * un-unusual feature.
+ *
+ * Specifically hash table size must be a power of two, and the hash function
+ * simply clips the high order bits off the page offset of the entry.
+ *
+ * This should work, as space is typically allocated sequentually, and thus
+ * via a reverse principle of locality argument, hot pages are unlikely to
+ * hash to the same bucket. That said, we must collect statistics to alert
+ * us should this not be the case.
+ *
+ * index Array of pointer to H5PB_entry_t of size
+ * H5PB__HASH_TABLE_LEN. This size must ba a power of 2,
+ * not the usual prime number.
+ *
+ * index_len: Number of entries currently in the hash table used to index
+ * the page buffer.
+ *
+ * index_size: Number of bytes currently stored in the hash table used to
+ * index the page buffer. Under normal circumstances, this
+ * value will be index_len * page size. However, if
+ * vfd_swmr_writer is TRUE, it may be larger.
+ *
+ * Fields supporting the modified LRU policy:
+ *
+ * See most any OS text for a discussion of the LRU replacement policy.
+ *
+ * Discussions of the individual fields used by the modified LRU replacement
+ * policy follow:
+ *
+ * LRU_len: Number of page buffer entries currently on the LRU.
+ *
+ * Observe that LRU_len + dwl_len must always equal
+ * index_len.
+ *
+ * LRU_size: Number of bytes of page buffer entries currently residing
+ * on the LRU list.
+ *
+ * Observe that LRU_size + dwl_size must always equal
+ * index_size.
+ *
+ * LRU_head_ptr: Pointer to the head of the doubly linked LRU list. Page
+ * buffer entries on this list are linked by their next and
+ * prev fields.
+ *
+ * This field is NULL if the list is empty.
+ *
+ * LRU_tail_ptr: Pointer to the tail of the doubly linked LRU list. Page
+ * buffer entries on this list are linked by their next and
+ * prev fields.
+ *
+ * This field is NULL if the list is empty.
+ *
+ *
+ * FIELDS FOR VFD SWMR:
+ *
+ * vfd_swmr_writer: Boolean flag that is set to TRUE iff the file is
+ * the file is opened in VFD SWMR mode. The remaining
+ * VFD SWMR flags are defined iff vfd_swmr_writer is TRUE.
+ *
+ * mpmde_count: int64_t containing the number of multi-page metadata
+ * entries currently resident in the page buffer. Observe
+ * that index_len should always equal curr_pages + mpmde_count.
+ *
+ * cur_tick: uint64_t containing the current tick. This is a copy of
+ * the same field in the associated instance of H5F_file_t,
+ * and is maintained as a convenience.
+ *
+ * In the context of VFD SWMR the delayed write list allows us to delay
+ * metadata writes to the HDF5 file until it appears in all indexes in the
+ * last max_lag ticks. This is essential if a version of the page or
+ * multi-page metadata entry already exists in the HDF5 file -- failure to
+ * delay the write can result in a message from the future which will
+ * likely be perciived as file corruption by the reader.
+ *
+ * To facilitate identification of entries that must be removed from the
+ * DWL, the list always observes the following invarient for any entry
+ * on the list:
+ *
+ * entry_ptr->next == NULL ||
+ * entry_ptr->delay_write_until >= entry_ptr->next->delay_write_until
+ *
+ * Discussion of the fields used to implement the delayed write list follows:
+ *
+ * max_delay: Maximum of the delay_write_until fields of the entries on
+ * the delayed write list. This must never be more than max_lag
+ * ticks in advance of the current tick, and should be set to
+ * zero if the delayed write list is empty.
+ *
+ * dwl_len: Number of page buffer entries currently on the delayed
+ * write list.
+ *
+ * Observe that LRU_len + dwl_len must always equal
+ * index_len.
+ *
+ * dwl_size: Number of bytes of page buffer entries currently residing
+ * on the LRU list.
+ *
+ * Observe that LRU_size + dwl_size must always equal
+ * index_size.
+ *
+ * dwl_head_ptr: Pointer to the head of the doubly linked delayed write list.
+ * Page buffer entries on this list are linked by their next and
+ * prev fields.
+ *
+ * This field is NULL if the list is empty.
+ *
+ * dwl_tail_ptr: Pointer to the tail of the doubly linked delayed write list.
+ * Page buffer entries on this list are linked by their next and
+ * prev fields.
+ *
+ * This field is NULL if the list is empty.
+ *
+ * For VFD SWMR to function, copies of all pages modified during a tick must
+ * be retained in the page buffer to allow correct updates to the index and
+ * metadata file at the end of tick.
+ *
+ * To implement this, all entries modified during the current tick are placed
+ * on the tick list. Entries are removed from the tick list during end of
+ * tick processing, so each tick starts with an empty tick list.
+ *
+ * Unless the entry also resides on the delayed write list, entries on the
+ * tick list may be flushed, but they may not be evicted.
+ *
+ * Discussion of the fields used to implement the tick list follows:
+ *
+ * tl_len: Number of page buffer entries currently on the tick list
+ *
+ * tl_size: Number of bytes of page buffer entries currently residing
+ * on the tick list.
+ *
+ * tl_head_ptr: Pointer to the head of the doubly linked tick list.
+ * Page buffer entries on this list are linked by their tl_next
+ * and tl_prev fields.
+ *
+ * This field is NULL if the list is empty.
+ *
+ * tl_tail_ptr: Pointer to the tail of the doubly linked tick list.
+ * Page buffer entries on this list are linked by their tl_next
+ * and tl_prev fields.
+ *
+ * This field is NULL if the list is empty.
+ *
+ *
+ * STATISTICS:
+ *
+ * Multi-page metadata entries (which may only appear in VFD
+ * SWMR mode) are NOT counted in the following statistics.
+ *
+ * Note that all statistics fields contain only data since the last time
+ * that statistics were reset.
+ *
+ * bypasses: Array of int64_t of length H5PB__NUM_STAT_TYPES containing
+ * the number of times that the page buffer has been
+ * bypassed for raw data, metadata, and for multi-page
+ * metadata entries (VFD SWMR only) as indexed by 5PB__STATS_MD,
+ * H5PB__STATS_RD, and H5PB__STATS_MPMDE respectively.
+ *
+ * accesses: Array of int64_t of length H5PB__NUM_STAT_TYPES containing
+ * the number of page buffer accesses for raw data, metadata,
+ * and for multi-page metadata entries (VFD SWMR only) as
+ * indexed by 5PB__STATS_MD, H5PB__STATS_RD, and
+ * H5PB__STATS_MPMDE respectively.
+ *
+ * hits: Array of int64_t of length H5PB__NUM_STAT_TYPES containing
+ * the number of page buffer hits for raw data, metadata,
+ * and for multi-page metadata entries (VFD SWMR only) as
+ * indexed by 5PB__STATS_MD, H5PB__STATS_RD, and
+ * H5PB__STATS_MPMDE respectively.
+ *
+ * misses: Array of int64_t of length H5PB__NUM_STAT_TYPES containing
+ * the number of page buffer misses for raw data, metadata,
+ * and for multi-page metadata entries (VFD SWMR only) as
+ * indexed by 5PB__STATS_MD, H5PB__STATS_RD, and
+ * H5PB__STATS_MPMDE respectively.
+ *
+ * loads: Array of int64_t of length H5PB__NUM_STAT_TYPES containing
+ * the number of page buffer loads for raw data, metadata,
+ * and for multi-page metadata entries (VFD SWMR only) as
+ * indexed by 5PB__STATS_MD, H5PB__STATS_RD, and
+ * H5PB__STATS_MPMDE respectively.
+ *
+ * insertions: Array of int64_t of length H5PB__NUM_STAT_TYPES containing
+ * the number of page buffer insertions of raw data, metadata,
+ * and for multi-page metadata entries (VFD SWMR only) as
+ * indexed by 5PB__STATS_MD, H5PB__STATS_RD, and
+ * H5PB__STATS_MPMDE respectively.
+ *
+ * flushes: Array of int64_t of length H5PB__NUM_STAT_TYPES containing
+ * the number of page buffer flushes of raw data, metadata,
+ * and for multi-page metadata entries (VFD SWMR only) as
+ * indexed by 5PB__STATS_MD, H5PB__STATS_RD, and
+ * H5PB__STATS_MPMDE respectively.
+ *
+ * evictions: Array of int64_t of length H5PB__NUM_STAT_TYPES containing
+ * the number of page buffer evictions of raw data, metadata,
+ * and for multi-page metadata entries (VFD SWMR only) as
+ * indexed by 5PB__STATS_MD, H5PB__STATS_RD, and
+ * H5PB__STATS_MPMDE respectively.
+ *
+ * clears: Array of int64_t of length H5PB__NUM_STAT_TYPES containing
+ * the number of page buffer entry clears of raw data, metadata,
+ * and for multi-page metadata entries (VFD SWMR only) as
+ * indexed by 5PB__STATS_MD, H5PB__STATS_RD, and
+ * H5PB__STATS_MPMDE respectively.
+ *
+ * max_lru_len: int64_t containing the maximum number of entries that
+ * have appeared in the LRU.
+ *
+ * max_lru_size: int64_t containing the maximum size of the LRU.
+ *
+ * lru_md_skips: When searching for an entry to evict, metadata entries on
+ * the LRU must be skipped if the number of metadata pages
+ * in the page buffer fails to exceed min_md_pages.
+ *
+ * This int64_t is used to keep a count of these skips.
+ *
+ * If this number becomes excessive, it will be necessary to
+ * add a holding tank for such entries.
+ *
+ * lru_rd_skips: When searching for an entry to evict, raw data entries on
+ * the LRU must be skipped if the number of raw data pages
+ * in the page buffer fails to exceed min_rd_pages.
+ *
+ * This int64_t is used to keep a count of these skips.
+ *
+ * If this number becomes excessive, it will be necessary to
+ * add a holding tank for such entries.
+ *
+ * Multi-page metadata entries (which appear only in VFD SWMR mode) are
+ * listed in the hash take, and thus they are counted in the following
+ * statistics.
+ *
+ * total_ht_insertions: Number of times entries have been inserted into the
+ * hash table.
+ *
+ * total_ht_deletions: Number of times entries have been deleted from the
+ * hash table.
+ *
+ * successful_ht_searches: int64 containing the total number of successful
+ * searches of the hash table.
+ *
+ * total_successful_ht_search_depth: int64 containing the total number of
+ * entries other than the targets examined in successful
+ * searches of the hash table.
+ *
+ * failed_ht_searches: int64 containing the total number of unsuccessful
+ * searches of the hash table.
+ *
+ * total_failed_ht_search_depth: int64 containing the total number of
+ * entries examined in unsuccessful searches of the hash
+ * table.
+ *
+ * max_index_len: Largest value attained by the index_len field.
+ *
+ * max_index_size: Largest value attained by the index_size field.
+ *
+ * max_rd_pages: Maximum number of raw data pages in the page buffer.
+ *
+ * max_md_pages: Maximum number of metadata pages in the page buffer.
+ *
+ *
+ * Statistics pretaining to VFD SWMR.
+ *
+ * max_mpmde_count: Maximum number of multi-page metadata entries in the
+ * page buffer.
+ *
+ * lru_tl_skips: When searching for an entry to evict, metadata entries on
+ * the LRU must be skipped if they also reside on the tick list.
+ *
+ * This int64_t is used to keep a count of these skips.
+ *
+ * If this number becomes excessive, it will be necessary to
+ * add a holding tank for such entries.
+ *
+ * lru_dwl_skips: When searching for an entry to evict, metadata entries on
+ * the LRU must be skipped if they also reside on the tick list.
+ *
+ * This int64_t is used to keep a count of these skips.
+ *
+ * If this number becomes excessive, it will be necessary to
+ * add a holding tank for such entries.
+ *
+ * max_tl_len: int64_t containing the maximum value of tl_len.
+ *
+ * max_tl_size: int64_t containing the maximum value of tl_size.
+ *
+ * delayed_writes: int64_t containing the total number of delayed writes.
+ *
+ * total_delay: int64_t containing the total number of ticks by which
+ * entry writes have been delayed.
+ *
+ * max_dwl_len: int64_t containing the maximum value of dwl_len.
+ *
+ * max_dwl_size: int64_t containing the maximum value of dwl_size.
+ *
+ * total_dwl_ins_depth: int64_t containing the total insertion depth
+ * required to maintain the odering invarient on the
+ * delayed write list.
+ *
+ ******************************************************************************/
+
+#define H5PB__H5PB_T_MAGIC 0x01020304
+
+#define H5PB__STATS_MD 0
+#define H5PB__STATS_RD 1
+#define H5PB__STATS_MPMDE 2
+#define H5PB__NUM_STAT_TYPES 3
-/* Typedef for the main structure for the page buffer */
typedef struct H5PB_t {
- size_t max_size; /* The total page buffer size */
- size_t page_size; /* Size of a single page */
- unsigned min_meta_perc; /* Minimum ratio of metadata entries required before evicting meta entries */
- unsigned min_raw_perc; /* Minimum ratio of raw data entries required before evicting raw entries */
- unsigned meta_count; /* Number of entries for metadata */
- unsigned raw_count; /* Number of entries for raw data */
- unsigned min_meta_count; /* Minimum # of entries for metadata */
- unsigned min_raw_count; /* Minimum # of entries for raw data */
-
- H5SL_t *slist_ptr; /* Skip list with all the active page entries */
- H5SL_t *mf_slist_ptr; /* Skip list containing newly allocated page entries inserted from the MF layer */
-
- size_t LRU_list_len; /* Number of entries in the LRU (identical to slist_ptr count) */
- struct H5PB_entry_t *LRU_head_ptr; /* Head pointer of the LRU */
- struct H5PB_entry_t *LRU_tail_ptr; /* Tail pointer of the LRU */
-
- H5FL_fac_head_t *page_fac; /* Factory for allocating pages */
-
- /* Statistics */
- unsigned accesses[2];
- unsigned hits[2];
- unsigned misses[2];
- unsigned evictions[2];
- unsigned bypasses[2];
+
+ /* Fields for general operations: */
+
+ uint32_t magic;
+ size_t page_size;
+ int64_t max_pages;
+ int64_t curr_pages;
+ int64_t curr_md_pages;
+ int64_t curr_rd_pages;
+ int64_t min_md_pages;
+ int64_t min_rd_pages;
+
+ /* FAPL fields */
+ size_t max_size;
+ unsigned min_meta_perc;
+ unsigned min_raw_perc;
+
+ /* index */
+ H5PB_entry_t *(ht[H5PB__HASH_TABLE_LEN]);
+ int64_t index_len;
+ int64_t index_size;
+
+ /* LRU */
+ int64_t LRU_len;
+ int64_t LRU_size;
+ H5PB_entry_t * LRU_head_ptr;
+ H5PB_entry_t * LRU_tail_ptr;
+
+
+ /* Fields for VFD SWMR operations: */
+
+ hbool_t vfd_swmr_writer;
+ int64_t mpmde_count;
+ uint64_t cur_tick;
+
+ /* delayed write list */
+ uint64_t max_delay;
+ int64_t dwl_len;
+ int64_t dwl_size;
+ H5PB_entry_t * dwl_head_ptr;
+ H5PB_entry_t * dwl_tail_ptr;
+
+ /* tick list */
+ int64_t tl_len;
+ int64_t tl_size;
+ H5PB_entry_t * tl_head_ptr;
+ H5PB_entry_t * tl_tail_ptr;
+
+ /* Statistics: */
+
+ /* general operations statistics: */
+ /* these statistics count pages only, not multi-page metadata entries
+ * (that occur only in the VFD SWMR writer case).
+ */
+ int64_t bypasses[H5PB__NUM_STAT_TYPES];
+ int64_t accesses[H5PB__NUM_STAT_TYPES];
+ int64_t hits[H5PB__NUM_STAT_TYPES];
+ int64_t misses[H5PB__NUM_STAT_TYPES];
+ int64_t loads[H5PB__NUM_STAT_TYPES];
+ int64_t insertions[H5PB__NUM_STAT_TYPES];
+ int64_t flushes[H5PB__NUM_STAT_TYPES];
+ int64_t evictions[H5PB__NUM_STAT_TYPES];
+ int64_t clears[H5PB__NUM_STAT_TYPES];
+ int64_t max_lru_len;
+ int64_t max_lru_size;
+ int64_t lru_md_skips;
+ int64_t lru_rd_skips;
+
+ /* In the VFD SWMR case, both pages and multi-page metadata entries
+ * are stored in the index. Thus mult-page metadata entries are
+ * included in the index related statistics.
+ */
+ int64_t total_ht_insertions;
+ int64_t total_ht_deletions;
+ int64_t successful_ht_searches;
+ int64_t total_successful_ht_search_depth;
+ int64_t failed_ht_searches;
+ int64_t total_failed_ht_search_depth;
+ int64_t max_index_len;
+ int64_t max_index_size;
+ int64_t max_rd_pages;
+ int64_t max_md_pages;
+
+
+ /* vfd swmr statistics */
+ int64_t max_mpmde_count;
+ int64_t lru_tl_skips;
+ int64_t lru_dwl_skips;
+ int64_t max_tl_len;
+ int64_t max_tl_size;
+ int64_t delayed_writes;
+ int64_t total_delay;
+ int64_t max_dwl_len;
+ int64_t max_dwl_size;
+ int64_t total_dwl_ins_depth;
+
} H5PB_t;
/*****************************/
@@ -85,20 +547,38 @@ typedef struct H5PB_t {
/***************************************/
/* General routines */
-H5_DLL herr_t H5PB_create(H5F_t *file, size_t page_buffer_size, unsigned page_buf_min_meta_perc, unsigned page_buf_min_raw_perc);
+H5_DLL herr_t H5PB_create(H5F_t *file, size_t page_buffer_size,
+ unsigned page_buf_min_meta_perc, unsigned page_buf_min_raw_perc);
+
H5_DLL herr_t H5PB_flush(H5F_t *f);
+
H5_DLL herr_t H5PB_dest(H5F_t *f);
+
H5_DLL herr_t H5PB_add_new_page(H5F_t *f, H5FD_mem_t type, haddr_t page_addr);
-H5_DLL herr_t H5PB_update_entry(H5PB_t *page_buf, haddr_t addr, size_t size, const void *buf);
+
+H5_DLL herr_t H5PB_update_entry(H5PB_t *page_buf, haddr_t addr, size_t size,
+ const void *buf);
+
H5_DLL herr_t H5PB_remove_entry(const H5F_t *f, haddr_t addr);
-H5_DLL herr_t H5PB_read(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size, void *buf/*out*/);
-H5_DLL herr_t H5PB_write(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size, const void *buf);
+
+H5_DLL herr_t H5PB_read(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
+ void *buf/*out*/);
+
+H5_DLL herr_t H5PB_write(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
+ const void *buf);
/* Statistics routines */
H5_DLL herr_t H5PB_reset_stats(H5PB_t *page_buf);
+
H5_DLL herr_t H5PB_get_stats(const H5PB_t *page_buf, unsigned accesses[2],
- unsigned hits[2], unsigned misses[2], unsigned evictions[2], unsigned bypasses[2]);
+ unsigned hits[2], unsigned misses[2], unsigned evictions[2],
+ unsigned bypasses[2]);
+
H5_DLL herr_t H5PB_print_stats(const H5PB_t *page_buf);
+/* test & debug functions */
+H5_DLL herr_t H5PB_page_exists(H5F_t *f, haddr_t addr,
+ hbool_t *page_exists_ptr);
+
#endif /* !_H5PBprivate_H */