summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authormainzer <mainzer#hdfgroup.org>2018-11-19 08:05:37 (GMT)
committermainzer <mainzer#hdfgroup.org>2018-11-19 08:05:37 (GMT)
commit2f2cf050e68c02c397e032e1b43d05ed8bafafbf (patch)
treeb9307471b225e9d29b8b02829a2782f955ac66db /src
parente62f4bd4fab00b0cd03e269a858c21558a9321fa (diff)
downloadhdf5-2f2cf050e68c02c397e032e1b43d05ed8bafafbf.zip
hdf5-2f2cf050e68c02c397e032e1b43d05ed8bafafbf.tar.gz
hdf5-2f2cf050e68c02c397e032e1b43d05ed8bafafbf.tar.bz2
local commit of first cut at vfd swmr writer EOT code.
This is necessary to allow access to Vailin's recent changes This version passes non-swmr tests in a serial / debug build on charis.
Diffstat (limited to 'src')
-rw-r--r--src/H5FDprivate.h102
-rw-r--r--src/H5Fint.c789
-rw-r--r--src/H5Fpkg.h62
-rw-r--r--src/H5Fprivate.h2
-rw-r--r--src/H5Fpublic.h4
-rw-r--r--src/H5PB.c721
-rw-r--r--src/H5PBpkg.h805
-rw-r--r--src/H5PBprivate.h141
8 files changed, 2143 insertions, 483 deletions
diff --git a/src/H5FDprivate.h b/src/H5FDprivate.h
index 45ad4c2..b9582b3 100644
--- a/src/H5FDprivate.h
+++ b/src/H5FDprivate.h
@@ -87,55 +87,77 @@
/* Internal representation of metadata file index entry */
-/*
- * hdf5_page_offset: Unsigned 64-bit value containing the base address of the
- * metadata page, or multi page metadata entry in the HDF5
- * file IN PAGES.
- * To obtain byte offset, multiply this value by the page size.
+
+/*----------------------------------------------------------------------------
+ *
+ * struct H5FD_vfd_swmr_idx_entry_t
+ *
+ * Indicies into the VFD SWMR metadata file are maintained in arrays of
+ * instances of H5FD_vfd_swmr_index_t.
+ *
+ * The fields of H5FD_vfd_swmr_idx_entry_t are discussed below.
+ *
+ * hdf5_page_offset: Unsigned 64-bit value containing the base address of the
+ * metadata page, or multi page metadata entry in the HDF5
+ * file IN PAGES.
*
- * md_file_page_offset: Unsigned 64-bit value containing the base address of the
- * metadata page, or multi page metadata entry in the metadata
- * file IN PAGES.
- * To obtain byte offset, multiply this value by the page size.
+ * To obtain byte offset, multiply this value by the page size.
*
- * length: The length of the metadata page or multi- page metadata entry
- * in BYTES.
+ * md_file_page_offset: Unsigned 64-bit value containing the base address of
+ * the metadata page, or multi page metadata entry in the metadata
+ * file IN PAGES.
*
- * chksum: Checksum for the metadata page or multi-page metadata entry.
- * For the VFD SWMR writer, this value is undefined until the
- * referenced entry has been written to the metadata file.
+ * To obtain byte offset, multiply this value by the page size.
+ *
+ * length: The length of the metadata page or multi- page metadata entry
+ * in BYTES.
+ *
+ * chksum: Checksum for the metadata page or multi-page metadata entry.
+ * For the VFD SWMR writer, this value is undefined until the
+ * referenced entry has been written to the metadata file.
*
- * entry_ptr: Used by the VFD SWMR writer only.
- * For the VFD SWMR reader, this field should always be NULL.
- * If the referenced metadata page or multi-page metadata
- * entry was modified in the current tick, this field points to
- * a buffer in the page buffer containing its value.
- * This field is used by the metadata file creation/update code
- * to access the metadata pages or multi-page metadata entries
- * so that their current values can be copied into the metadata
- * file. After this copy, this field should be set to NULL.
+ * entry_ptr: Used by the VFD SWMR writer only.
+ *
+ * For the VFD SWMR reader, this field should always be NULL.
+ * If the referenced metadata page or multi-page metadata
+ * entry was modified in the current tick, this field points to
+ * a buffer in the page buffer containing its value.
+ * This field is used by the metadata file creation/update code
+ * to access the metadata pages or multi-page metadata entries
+ * so that their current values can be copied into the metadata
+ * file. After this copy, this field should be set to NULL.
*
- * tick_of_last_change: Number of the last tick in which this index entry was changed.
- * Used by the VFD SWMR writer only.
- * For the VFD SWMR reader, this field will always be set to 0.
+ * tick_of_last_change: Number of the last tick in which this index entry
+ * was changed.
*
- * clean: Used by the VFD SWMR writer only.
- * Set to TRUE whenever the referenced metadata page or multi-page
- * metadata entry is written to the HDF5 file.
- * Set to FALSE whenever it is marked dirty in the page buffer.
+ * Used by the VFD SWMR writer only.
*
- * tick_of_last_flush: Number of the tick in which this entry was last written to the
- * HDF5 file or zero if it has never been flusehd.
- * Used by the VFD SWMR writer only.
- * For the VFD SWMR reader, this field should always be 0.
+ * For the VFD SWMR reader, this field will always be set to 0.
+ *
+ * clean: Used by the VFD SWMR writer only.
+ *
+ * Set to TRUE whenever the referenced metadata page or
+ * multi-page metadata entry is written to the HDF5 file.
+ * Set to FALSE whenever it is marked dirty in the page buffer.
+ *
+ * tick_of_last_flush: Number of the tick in which this entry was last
+ * written to the HDF5 file or zero if it has never been flusehd.
+ *
+ * Used by the VFD SWMR writer only.
+ *
+ * For the VFD SWMR reader, this field should always be 0.
*
- * delayed_flush: If the flush of the referenced metadata page or multi-page
- * metadata entry must be delayed, the earliest tick in which
- * it may be flushed, or zero if there is no such constraint.
- * Used by the VFD SWMR writer only.
+ * delayed_flush: If the flush of the referenced metadata page or multi-page
+ * metadata entry must be delayed, the earliest tick in which
+ * it may be flushed, or zero if there is no such constraint.
+ *
+ * Used by the VFD SWMR writer only.
+ *
+ * is_moved_to_hdf5_file: Set to TRUE iff the entry referenced is in the
+ * HDF5 file and is therefore about to be removed from the
+ * metadata file
*
- * is_moved_to_hdf5_file: Set to TRUE iff the entry referenced is in the HDF5 file and
- * is therefore about to be removed from the metadata file
+ *----------------------------------------------------------------------------
*/
typedef struct H5FD_vfd_swmr_idx_entry_t {
uint64_t hdf5_page_offset;
diff --git a/src/H5Fint.c b/src/H5Fint.c
index bca09b2..2df2d7e 100644
--- a/src/H5Fint.c
+++ b/src/H5Fint.c
@@ -125,6 +125,8 @@ static herr_t H5F__vfd_swmr_update_end_of_tick_and_tick_num(H5F_t *f, hbool_t in
static herr_t H5F__vfd_swmr_construct_write_md_hdr(H5F_t *f, uint32_t num_entries);
static herr_t H5F__vfd_swmr_construct_write_md_idx(H5F_t *f, uint32_t num_entries, struct H5FD_vfd_swmr_idx_entry_t index[]);
static herr_t H5F__idx_entry_cmp(const void *_entry1, const void *_entry2);
+static herr_t H5F__vfd_swmr_writer__create_index(H5F_t * f);
+
/*********************/
@@ -3588,25 +3590,39 @@ done:
/*-------------------------------------------------------------------------
+ *
* Function: H5F__vfd_swmr_init
*
- * Purpose: Initialize globals and the corresponding fields in file pointer.
- * For both:
- * --set vfd_swmr_g to TRUE
- * --set vfd_swmr_file_g to f
- * --set end_of_tick to the current time + tick length
+ * Purpose: Initialize globals and the corresponding fields in
+ * file pointer.
+ *
+ * For both VFD SWMR writer and reader:
+ *
+ * --set vfd_swmr_g to TRUE
+ * --set vfd_swmr_file_g to f
+ * --set end_of_tick to the current time + tick length
+ *
* For VFD SWMR writer:
- * --set vfd_swmr_writer_g to TRUE
- * --set tick_num_g to 0
- * --create the metadata file
- * --when opening an existing HDF5 file, write header and empty index in the metadata file
+ *
+ * --set vfd_swmr_writer_g to TRUE
+ * --set tick_num_g to 0
+ * --create the metadata file
+ * --when opening an existing HDF5 file, write header and
+ * empty index in the metadata file
+ *
* For VFD SWMR reader:
- * --set vfd_swmr_writer_g to FALSE
- * --set tick_num_g to the current tick read from the metadata file
+ *
+ * --set vfd_swmr_writer_g to FALSE
+ * --set tick_num_g to the current tick read from the
+ * metadata file
*
* Return: Success: SUCCEED
* Failure: FAIL
*
+ * Programmer: Vailin Choi -- 11/??/18
+ *
+ * Changes: None.
+ *
*-------------------------------------------------------------------------
*/
static herr_t
@@ -3623,63 +3639,96 @@ H5F__vfd_swmr_init(H5F_t *f, hbool_t file_create)
vfd_swmr_file_g = f;
if(H5F_INTENT(f) & H5F_ACC_RDWR) {
+
HDassert(f->shared->vfd_swmr_config.vfd_swmr_writer);
vfd_swmr_writer_g = f->shared->vfd_swmr_writer = TRUE;
tick_num_g = f->shared->tick_num = 0;
/* Create the metadata file */
- if(((f->shared->vfd_swmr_md_fd = HDopen(f->shared->vfd_swmr_config.md_file_path, O_CREAT|O_RDWR, H5_POSIX_CREATE_MODE_RW))) < 0)
- HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "unable to create the metadata file")
+ if ( ((f->shared->vfd_swmr_md_fd =
+ HDopen(f->shared->vfd_swmr_config.md_file_path, O_CREAT|O_RDWR,
+ H5_POSIX_CREATE_MODE_RW))) < 0 )
+
+ HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, \
+ "unable to create the metadata file")
- md_size = (hsize_t)f->shared->vfd_swmr_config.md_pages_reserved * f->shared->fs_page_size;
+ md_size = (hsize_t)f->shared->vfd_swmr_config.md_pages_reserved *
+ f->shared->fs_page_size;
/* Set the metadata file size to md_pages_reserved */
- if(-1 == HDftruncate(f->shared->vfd_swmr_md_fd, (HDoff_t)md_size))
- HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, "truncate fail for the metadata file")
+ if ( -1 == HDftruncate(f->shared->vfd_swmr_md_fd, (HDoff_t)md_size) )
+
+ HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, \
+ "truncate fail for the metadata file")
/* Set eof for metadata file to md_pages_reserved */
f->shared->vfd_swmr_md_eoa = (haddr_t)md_size;
- /* When opening an existing HDF5 file, create header and empty index in the metadata file */
- if(!file_create) {
- if(H5F__vfd_swmr_construct_write_md_hdr(f, 0) < 0)
- HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, "fail to create header in md")
- if(H5F__vfd_swmr_construct_write_md_idx(f, 0, NULL) < 0)
- HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, "fail to create index in md")
+ /* When opening an existing HDF5 file, create header and empty
+ * index in the metadata file
+ */
+ if ( !file_create ) {
+
+ if ( H5F__vfd_swmr_construct_write_md_hdr(f, 0) < 0 )
+
+ HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, \
+ "fail to create header in md")
+
+ if ( H5F__vfd_swmr_construct_write_md_idx(f, 0, NULL) < 0 )
+
+ HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, \
+ "fail to create index in md")
}
} else { /* VFD SWMR reader */
+
HDassert(!f->shared->vfd_swmr_config.vfd_swmr_writer);
+
vfd_swmr_writer_g = f->shared->vfd_swmr_writer = FALSE;
/* Set tick_num_g to the current tick read from the metadata file */
- if(H5FD_vfd_swmr_get_tick_and_idx(f->shared->lf, FALSE, &tick_num_g, NULL, NULL) < 0)
- HGOTO_ERROR(H5E_FILE, H5E_CANTLOAD, FAIL, "unable to load/decode metadata file")
+ if ( H5FD_vfd_swmr_get_tick_and_idx(f->shared->lf, FALSE,
+ &tick_num_g, NULL, NULL) < 0 )
+
+ HGOTO_ERROR(H5E_FILE, H5E_CANTLOAD, FAIL, \
+ "unable to load/decode metadata file")
+
f->shared->tick_num = tick_num_g;
}
/* Update end_of_tick */
- if(H5F__vfd_swmr_update_end_of_tick_and_tick_num(f, FALSE) < 0)
+ if ( H5F__vfd_swmr_update_end_of_tick_and_tick_num(f, FALSE) < 0 )
+
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, "unable to update end of tick")
done:
+
FUNC_LEAVE_NOAPI(ret_value)
+
} /* H5F__vfd_swmr_init() */
+
/*-------------------------------------------------------------------------
+ *
* Function: H5F__vfd_swmr_construct_write_md_hdr
*
* Purpose: Encode and write header to the metadata file.
+ *
* This is used by the VFD SWMR writer:
- * --when opening an existing HDF5 file
- * --when closing the HDF5 file
- * --after flushing an HDF5 file
- * --when updating the metadata file
+ *
+ * --when opening an existing HDF5 file
+ * --when closing the HDF5 file
+ * --after flushing an HDF5 file
+ * --when updating the metadata file
*
* Return: Success: SUCCEED
* Failure: FAIL
*
+ * Programmer: Vailin Choi -- 11/??/18
+ *
+ * Changes: None.
+ *
*-------------------------------------------------------------------------
*/
static herr_t
@@ -3718,34 +3767,49 @@ H5F__vfd_swmr_construct_write_md_hdr(H5F_t *f, uint32_t num_entries)
HDassert((size_t)(p - image == hdr_size));
/* Set to beginning of the file */
- if(HDlseek(f->shared->vfd_swmr_md_fd, (HDoff_t)0, SEEK_SET) < 0)
- HGOTO_ERROR(H5E_VFL, H5E_SEEKERROR, FAIL, "unable to seek in metadata file")
+ if ( HDlseek(f->shared->vfd_swmr_md_fd, (HDoff_t)0, SEEK_SET) < 0 )
+
+ HGOTO_ERROR(H5E_VFL, H5E_SEEKERROR, FAIL, \
+ "unable to seek in metadata file")
/* Write header to the metadata file */
- if(HDwrite(f->shared->vfd_swmr_md_fd, image, hdr_size) != hdr_size)
- HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, "error in writing header to metadata file")
+ if ( HDwrite(f->shared->vfd_swmr_md_fd, image, hdr_size) != hdr_size )
+
+ HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, \
+ "error in writing header to metadata file")
done:
+
FUNC_LEAVE_NOAPI(ret_value)
+
} /* H5F__vfd_swmr_construct_write_md_hdr() */
+
/*-------------------------------------------------------------------------
+
* Function: H5F__vfd_swmr_construct_write_md_idx
*
* Purpose: Encode and write index to the metadata file.
+ *
* This is used by the VFD SWMR writer:
- * --when opening an existing HDF5 file
- * --when closing the HDF5 file
- * --after flushing an HDF5 file
- * --when updating the metadata file
+ *
+ * --when opening an existing HDF5 file
+ * --when closing the HDF5 file
+ * --after flushing an HDF5 file
+ * --when updating the metadata file
*
* Return: Success: SUCCEED
* Failure: FAIL
*
+ * Programmer: Vailin Choi -- 11/??/18
+ *
+ * Changes: None.
+ *
*-------------------------------------------------------------------------
*/
static herr_t
-H5F__vfd_swmr_construct_write_md_idx(H5F_t *f, uint32_t num_entries, struct H5FD_vfd_swmr_idx_entry_t index[])
+H5F__vfd_swmr_construct_write_md_idx(H5F_t *f, uint32_t num_entries,
+ struct H5FD_vfd_swmr_idx_entry_t index[])
{
uint8_t *image = NULL; /* Pointer to buffer */
uint8_t *p = NULL; /* Pointer to buffer */
@@ -3756,11 +3820,14 @@ H5F__vfd_swmr_construct_write_md_idx(H5F_t *f, uint32_t num_entries, struct H5FD
FUNC_ENTER_STATIC
- HDassert((num_entries!= 0 && index != NULL) || (num_entries == 0 && index == NULL));
+ HDassert((num_entries!= 0 && index != NULL) ||
+ (num_entries == 0 && index == NULL));
/* Allocate space for the buffer to hold the index */
- if((image = (uint8_t *)HDmalloc(idx_size)) == NULL)
- HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for md index")
+ if ( (image = (uint8_t *)HDmalloc(idx_size)) == NULL )
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "memory allocation failed for md index")
/*
* Encode metadata file index
@@ -3798,22 +3865,33 @@ H5F__vfd_swmr_construct_write_md_idx(H5F_t *f, uint32_t num_entries, struct H5FD
HDassert(f->shared->vfd_swmr_md_fd >= 0);
/* Set to right after the header */
- if(HDlseek(f->shared->vfd_swmr_md_fd, (HDoff_t)H5FD_MD_HEADER_SIZE, SEEK_SET) < 0)
- HGOTO_ERROR(H5E_VFL, H5E_SEEKERROR, FAIL, "unable to seek in metadata file")
+ if ( HDlseek(f->shared->vfd_swmr_md_fd, (HDoff_t)H5FD_MD_HEADER_SIZE,
+ SEEK_SET) < 0)
+
+ HGOTO_ERROR(H5E_VFL, H5E_SEEKERROR, FAIL, \
+ "unable to seek in metadata file")
/* Write index to the metadata file */
- if(HDwrite(f->shared->vfd_swmr_md_fd, image, idx_size) != idx_size)
- HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, "error in writing index to metadata file")
+ if ( HDwrite(f->shared->vfd_swmr_md_fd, image, idx_size) != idx_size )
+
+ HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, \
+ "error in writing index to metadata file")
done:
- if(image)
+
+ if ( image ) {
+
HDfree(image);
+ }
+
FUNC_LEAVE_NOAPI(ret_value)
+
} /* H5F__vfd_swmr_construct_write_idx() */
/*-------------------------------------------------------------------------
+ *
* Function: H5F__vfd_swmr_update_end_of_tick_and_tick_num
*
* Purpose: Update end_of_tick (end_of_tick_g, f->shared->end_of_tick)
@@ -3822,6 +3900,10 @@ done:
* Return: Success: SUCCEED
* Failure: FAIL
*
+ * Programmer: Vailin Choi -- 11/??/18
+ *
+ * Changes: None.
+ *
*-------------------------------------------------------------------------
*/
static herr_t
@@ -3831,15 +3913,19 @@ H5F__vfd_swmr_update_end_of_tick_and_tick_num(H5F_t *f, hbool_t incr_tick_num)
struct timespec new_end_of_tick; /* new end_of_tick in struct timespec */
long curr_nsecs; /* current time in nanoseconds */
long tlen_nsecs; /* tick_len in nanoseconds */
+#if 0 /* JRM */
long end_nsecs; /* end_of_tick in nanoseconds */
+#endif /* JRM */
long new_end_nsecs; /* new end_of_tick in nanoseconds */
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_STATIC
/* Get current time in struct timespec */
- if(HDclock_gettime(CLOCK_MONOTONIC, &curr) < 0)
- HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get time via clock_gettime")
+ if ( HDclock_gettime(CLOCK_MONOTONIC, &curr) < 0 )
+
+ HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, \
+ "can't get time via clock_gettime")
/* Convert curr to nsecs */
curr_nsecs = curr.tv_sec * SECOND_TO_NANOSECS + curr.tv_nsec;
@@ -3850,12 +3936,22 @@ H5F__vfd_swmr_update_end_of_tick_and_tick_num(H5F_t *f, hbool_t incr_tick_num)
/*
* Update tick_num_g, f->shared->tick_num
*/
- if(incr_tick_num) {
+ if ( incr_tick_num ) {
+
+#if 0 /* JRM */
/* Convert end_of_tick_g to nanoseconds */
- end_nsecs = end_of_tick_g.tv_sec * SECOND_TO_NANOSECS + end_of_tick_g.tv_nsec;
+ end_nsecs = end_of_tick_g.tv_sec * SECOND_TO_NANOSECS +
+ end_of_tick_g.tv_nsec;
/* Increment tick_num by # of elapsed ticks */
tick_num_g += (1+ (uint64_t)((curr_nsecs - end_nsecs) / tlen_nsecs));
+#else /* JRM */
+ /* Regardless of elapsed time, only increment the tick num by 1
+ * so as to avoid the possibility of using up all of max_lag in
+ * one or two ticks.
+ */
+ tick_num_g++;
+#endif /* JRM */
f->shared->tick_num = tick_num_g;
}
@@ -3863,29 +3959,43 @@ H5F__vfd_swmr_update_end_of_tick_and_tick_num(H5F_t *f, hbool_t incr_tick_num)
* Update end_of_tick_g, f->shared->end_of_tick
*/
/* Calculate new end_of_tick */
+
+ /* TODO: The modulo operation is very expensive on most machines --
+ * re-work this code so as to avoid it.
+ *
+ * JRM -- 11/12/18
+ */
+
new_end_nsecs = curr_nsecs + tlen_nsecs;
new_end_of_tick.tv_nsec = new_end_nsecs % SECOND_TO_NANOSECS;
new_end_of_tick.tv_sec = new_end_nsecs / SECOND_TO_NANOSECS;
/* Update end_of_tick */
HDmemcpy(&end_of_tick_g, &new_end_of_tick, sizeof(struct timespec));
- HDmemcpy(&f->shared->end_of_tick, &new_end_of_tick, sizeof(struct timespec));
+ HDmemcpy(&f->shared->end_of_tick, &new_end_of_tick,
+ sizeof(struct timespec));
done:
+
FUNC_LEAVE_NOAPI(ret_value)
+
} /* H5F__vfd_swmr_update_end_of_tick_and_tick_num() */
/*-------------------------------------------------------------------------
+ *
* Function: H5F__vfd_swmr_close_or_flush
*
- * Purpose: Used by the VFD SWMR writer when the HDF5 file is closed or flushed:
+ * Purpose: Used by the VFD SWMR writer when the HDF5 file is closed
+ * or flushed:
+ *
* 1) For file close:
* --write header and an empty index to the metadata file
* --increment tick_num
* --close the metadata file
* --unlink the metadata file
* --close the free-space manager for the metadata file
+ *
* 2) For file flush:
* --write header and an empty index to the metadata file
* --increment tick_num
@@ -3895,6 +4005,10 @@ done:
* Return: Success: SUCCEED
* Failure: FAIL
*
+ * Programmer: Vailin Choi -- 11/??/18
+ *
+ * Changes: None.
+ *
*-------------------------------------------------------------------------
*/
static herr_t
@@ -3909,52 +4023,71 @@ H5F__vfd_swmr_close_or_flush(H5F_t *f, hbool_t closing)
HDassert(f->shared->vfd_swmr_md_fd >= 0);
/* Write empty index to the md file */
- if(H5F__vfd_swmr_construct_write_md_idx(f, 0, NULL) < 0)
+ if ( H5F__vfd_swmr_construct_write_md_idx(f, 0, NULL) < 0 )
+
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, "fail to create index in md")
+
+
/* Write header to the md file */
- if(H5F__vfd_swmr_construct_write_md_hdr(f, 0) < 0)
+ if ( H5F__vfd_swmr_construct_write_md_hdr(f, 0) < 0 )
+
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, "fail to create header in md")
/* Increment tick_num */
tick_num_g = ++f->shared->tick_num;
- if(closing) { /* For file close */
+ if ( closing ) { /* For file close */
+
/* Close the md file */
if(HDclose(f->shared->vfd_swmr_md_fd) < 0)
- HGOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, "unable to close the metadata file")
+
+ HGOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, \
+ "unable to close the metadata file")
f->shared->vfd_swmr_md_fd = -1;
/* Unlink the md file */
- if(HDunlink(f->shared->vfd_swmr_config.md_file_path) < 0)
- HGOTO_ERROR(H5E_FILE, H5E_CANTREMOVE, FAIL, "unable to unlink the metadata file")
+ if ( HDunlink(f->shared->vfd_swmr_config.md_file_path) < 0 )
+
+ HGOTO_ERROR(H5E_FILE, H5E_CANTREMOVE, FAIL, \
+ "unable to unlink the metadata file")
/* Close the free-space manager for the metadata file */
- if(H5MV_close(f) < 0)
- HGOTO_ERROR(H5E_FILE, H5E_CANTRELEASE, FAIL, "unable to close the free-space manager for the metadata file")
+ if ( H5MV_close(f) < 0 )
+
+ HGOTO_ERROR(H5E_FILE, H5E_CANTRELEASE, FAIL, \
+ "unable to close the free-space manager for the metadata file")
/* Free the delayed list */
curr = f->shared->dl_head_ptr;
- while(curr != NULL) {
+
+ while ( curr != NULL ) {
+
next = curr->next;
curr = H5FL_FREE(H5F_vfd_swmr_dl_entry_t, curr);
curr = next;
+
} /* end while */
+
f->shared->dl_head_ptr = f->shared->dl_tail_ptr = NULL;
vfd_swmr_file_g = NULL;
} else { /* For file flush */
+
/* Update end_of_tick */
- if(H5F__vfd_swmr_update_end_of_tick_and_tick_num(f, TRUE) < 0)
- HDONE_ERROR(H5E_FILE, H5E_CANTSET, FAIL, "unable to update end of tick")
+ if ( H5F__vfd_swmr_update_end_of_tick_and_tick_num(f, TRUE) < 0 )
+
+ HDONE_ERROR(H5E_FILE, H5E_CANTSET, FAIL, \
+ "unable to update end of tick")
}
done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* H5F__vfd_swmr_close_or_flush() */
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5F__vfd_swmr_close_or_flush() */
+
/*-------------------------------------------------------------------------
* Function: H5F__idx_entry_cmp()
*
@@ -3989,10 +4122,13 @@ H5F__idx_entry_cmp(const void *_entry1, const void *_entry2)
} /* H5F__idx_entry_cmp() */
/*-------------------------------------------------------------------------
+ *
* Function: H5F_update_vfd_swmr_metadata_file()
*
* Purpose: Update the metadata file with the input index
+ *
* --Sort index
+ *
* --For each non-null entry_ptr in the index entries:
* --Insert previous image of the entry onto the delayed list
* --Allocate space for the entry in the metadata file
@@ -4000,19 +4136,35 @@ H5F__idx_entry_cmp(const void *_entry1, const void *_entry2)
* --Update index entry
* --Write the entry to the metadata file
* --Set entry_ptr to NULL
- * --Construct on disk image of the index and write index to the metadata file
- * --Construct on disk image of the header and write header to the metadata file
- * --Release time out entries from the delayed list to the free-space manager
+ *
+ * --Construct on disk image of the index and write index to the
+ * metadata file
+ *
+ * --Construct on disk image of the header and write header to
+ * the metadata file
+ *
+ * --Release time out entries from the delayed list to the
+ * free-space manager
*
* Return: SUCCEED/FAIL
*
+ * Programmer: Vailin Choi 11/??/18
+ *
+ * Changes: None.
+ *
+ *
*-------------------------------------------------------------------------
*/
herr_t
-H5F_update_vfd_swmr_metadata_file(H5F_t *f, uint32_t num_entries, struct H5FD_vfd_swmr_idx_entry_t index[])
+H5F_update_vfd_swmr_metadata_file(H5F_t *f, uint32_t num_entries,
+ struct H5FD_vfd_swmr_idx_entry_t index[])
{
- H5F_vfd_swmr_dl_entry_t *prev; /* Points to the previous entry in the delayed list */
- H5F_vfd_swmr_dl_entry_t *dl_entry; /* Points to an entry in the delayed list */
+ H5F_vfd_swmr_dl_entry_t *prev; /* Points to the previous entry
+ * in the delayed list
+ */
+ H5F_vfd_swmr_dl_entry_t *dl_entry; /* Points to an entry in the
+ * delayed list
+ */
haddr_t md_addr; /* Address in the metadata file */
unsigned i; /* Local index variable */
herr_t ret_value = SUCCEED; /* Return value */
@@ -4020,110 +4172,537 @@ H5F_update_vfd_swmr_metadata_file(H5F_t *f, uint32_t num_entries, struct H5FD_vf
FUNC_ENTER_NOAPI(FAIL)
/* Sort index entries by increasing offset in the HDF5 file */
- if(num_entries)
- HDqsort(index, num_entries, sizeof(H5FD_vfd_swmr_idx_entry_t), H5F__idx_entry_cmp);
+ if ( num_entries ) {
+
+ HDqsort(index, num_entries, sizeof(H5FD_vfd_swmr_idx_entry_t),
+ H5F__idx_entry_cmp);
+ }
/* For each non-null entry_ptr in the index:
- * --Insert previous image of the entry (if exists) to the beginning of the delayed list
+ *
+ * --Insert previous image of the entry (if exists) to the
+ * beginning of the delayed list
+ *
* --Allocate space for the entry in the metadata file
- * --Compute checksum, update the index entry, write entry to the metadata file
+ *
+ * --Compute checksum, update the index entry, write entry to
+ * the metadata file
+ *
* --Set entry_ptr to NULL
*/
- for(i = 0; i < num_entries; i++) {
- if(index[i].entry_ptr != NULL) {
+ for ( i = 0; i < num_entries; i++ ) {
+
+ if ( index[i].entry_ptr != NULL ) {
+
/* Prepend previous image of the entry to the delayed list */
- if(index[i].md_file_page_offset) {
- if(NULL == (dl_entry = H5FL_CALLOC(H5F_vfd_swmr_dl_entry_t)))
- HGOTO_ERROR(H5E_FILE, H5E_CANTALLOC, FAIL, "unable to allocate the delayed entry")
+ if ( index[i].md_file_page_offset ) {
+
+ if ( NULL == (dl_entry = H5FL_CALLOC(H5F_vfd_swmr_dl_entry_t)))
+
+ HGOTO_ERROR(H5E_FILE, H5E_CANTALLOC, FAIL, \
+ "unable to allocate the delayed entry")
+
dl_entry->hdf5_page_offset = index[i].hdf5_page_offset;
dl_entry->md_file_page_offset = index[i].md_file_page_offset;
dl_entry->length = index[i].length;
dl_entry->tick_num = f->shared->tick_num;
- H5F_DC_PREPEND(dl_entry, f->shared->dl_head_ptr, f->shared->dl_tail_ptr, f->shared->dl_len);
+
+ H5F_DC_PREPEND(dl_entry, f->shared->dl_head_ptr, \
+ f->shared->dl_tail_ptr, f->shared->dl_len);
}
/* Allocate space for the entry in the metadata file */
if((md_addr = H5MV_alloc(f, index[i].length)) == HADDR_UNDEF)
- HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, "error in allocating space from the metadata file")
+
+ HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, \
+ "error in allocating space from the metadata file")
+
/* Compute checksum and update the index entry */
index[i].md_file_page_offset = md_addr/f->shared->fs_page_size;
- index[i].chksum = H5_checksum_metadata(index[i].entry_ptr, (size_t)(index[i].length), 0);
+ index[i].chksum = H5_checksum_metadata(index[i].entry_ptr,
+ (size_t)(index[i].length), 0);
/* Seek and write the entry to the metadata file */
- if(HDlseek(f->shared->vfd_swmr_md_fd, (HDoff_t)md_addr, SEEK_SET) < 0)
- HGOTO_ERROR(H5E_FILE, H5E_SEEKERROR, FAIL, "unable to seek in the metadata file")
- if(HDwrite(f->shared->vfd_swmr_md_fd, index[i].entry_ptr, index[i].length) != index[i].length)
- HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, "error in writing the page/multi-page entry to metadata file")
+ if ( HDlseek(f->shared->vfd_swmr_md_fd, (HDoff_t)md_addr,
+ SEEK_SET) < 0)
+
+ HGOTO_ERROR(H5E_FILE, H5E_SEEKERROR, FAIL, \
+ "unable to seek in the metadata file")
+
+ if ( HDwrite(f->shared->vfd_swmr_md_fd, index[i].entry_ptr,
+ index[i].length) != index[i].length )
+
+ HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, \
+ "error in writing the page/multi-page entry to metadata file")
/* Set entry_ptr to NULL */
index[i].entry_ptr = NULL;
- } /* end if */
+ } /* end if */
} /* end for */
/* Construct and write index to the metadata file */
- if(H5F__vfd_swmr_construct_write_md_idx(f, num_entries, index) < 0)
- HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, "fail to construct & write index to md")
+ if ( H5F__vfd_swmr_construct_write_md_idx(f, num_entries, index) < 0 )
+
+ HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, \
+ "fail to construct & write index to md")
/* Construct and write header to the md file */
- if(H5F__vfd_swmr_construct_write_md_hdr(f, num_entries) < 0)
- HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, "fail to construct & write header to md")
+ if ( H5F__vfd_swmr_construct_write_md_hdr(f, num_entries) < 0 )
+
+ HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, \
+ "fail to construct & write header to md")
/*
- * Release time out entries from the delayed list by scanning the list from the bottom up:
- * --release to the metadata file free space manager all index entries that have
- * resided on the list for more than max_lag ticks
+ * Release time out entries from the delayed list by scanning the
+ * list from the bottom up:
+ *
+ * --release to the metadata file free space manager all index
+ * entries that have resided on the list for more than
+ * max_lag ticks
+ *
* --remove the associated entries from the list
*/
dl_entry = f->shared->dl_tail_ptr;
- while(dl_entry != NULL) {
+
+ while ( dl_entry != NULL ) {
prev = dl_entry->prev;
+
/* max_lag is at least 3 */
- if((int)dl_entry->tick_num <= ((int)f->shared->tick_num - f->shared->vfd_swmr_config.max_lag)) {
- if(H5MV_free(f, dl_entry->md_file_page_offset * f->shared->fs_page_size, dl_entry->length) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to flush clean entry")
+ if ( ( f->shared->tick_num > f->shared->vfd_swmr_config.max_lag ) &&
+ ( dl_entry->tick_num <=
+ f->shared->tick_num - f->shared->vfd_swmr_config.max_lag ) ) {
+
+ if ( H5MV_free(f, dl_entry->md_file_page_offset *
+ f->shared->fs_page_size, dl_entry->length) < 0 )
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \
+ "unable to flush clean entry")
/* Remove the entry from the delayed list */
- H5F_DC_REMOVE(dl_entry, f->shared->dl_head_ptr, f->shared->dl_tail_ptr, f->shared->dl_len)
+ H5F_DC_REMOVE(dl_entry, f->shared->dl_head_ptr, \
+ f->shared->dl_tail_ptr, f->shared->dl_len)
/* Free the delayed entry struct */
H5FL_FREE(H5F_vfd_swmr_dl_entry_t, dl_entry);
- } else
+
+ } else {
+
break;
+ }
+
dl_entry = prev;
+
} /* end while */
done:
+
FUNC_LEAVE_NOAPI(ret_value)
+
} /* end H5F_update_vfd_swmr_metadata_file() */
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5F_vfd_swmr_writer__delay_write
+ *
+ * Purpose: Given the base address of a page of metadata, or of a multi-
+ * page metadata entry, determine whether the write must be
+ * delayed.
+ *
+ * At the conceptual level, the VFD SWMR writer must delay the
+ * write of any metadata page or multi-page metadata that
+ * overwrites an existing metadata page or multi-page metadata
+ * entry until it has appeared in the metadata file index for
+ * at least max_lag ticks. Since the VFD SWMR reader goes
+ * to the HDF5 file for any piece of metadata not listed in
+ * the metadata file index, failure to delay such writes can
+ * result in message from the future bugs.
+ *
+ * The easy case case is pages or multi-page metadata entries
+ * have just been allocated. Obviously, these can be written
+ * immediately. This case is tracked and tested by the page
+ * buffer proper.
+ *
+ * This routine looks up the supplied page in the metadata file
+ * index.
+ *
+ * If the entry doesn't exist, the function sets
+ * *delay_write_until_ptr to the current tick plus max_lag.
+ *
+ * If the entry exists, the function sets *delay_write_until_ptr
+ * equal to the entries delayed flush field if it is greater than
+ * or equal to the current tick, or zero otherwise.
+ *
+ * Return: SUCCEED/FAIL
+ *
+ * Programmer: John Mainzer 11/4/18
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5F_vfd_swmr_writer__delay_write(H5F_t *f, uint64_t page,
+ uint64_t * delay_write_until_ptr)
+{
+ int32_t top = -1;
+ int32_t bottom = 0;
+ int32_t probe;
+ uint64_t delay_write_until = 0;
+ H5FD_vfd_swmr_idx_entry_t * ie_ptr = NULL;
+ H5FD_vfd_swmr_idx_entry_t * idx = NULL;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ HDassert(f);
+ HDassert(f->shared);
+ HDassert(f->shared->vfd_swmr);
+ HDassert(f->shared->vfd_swmr_writer);
+
+ idx = f->shared->mdf_idx;
+
+ HDassert((idx) ||( f->shared->tick_num <= 0));
+
+ /* do a binary search on the metadata file index to see if
+ * it already contains an entry for *pbe_ptr.
+ */
+
+ ie_ptr = NULL;
+
+ if ( idx ) {
+
+ top = f->shared->mdf_idx_entries_used - 1;
+ bottom = 0;
+ }
+
+ while ( top >= bottom ) {
+
+ HDassert(idx);
+
+ probe = top + bottom / 2;
+
+ if ( idx[probe].hdf5_page_offset < page ) {
+
+ bottom = probe + 1;
+
+ } else if ( idx[probe].hdf5_page_offset > page ) {
+
+ top = probe - 1;
+
+ } else { /* found it */
+
+ ie_ptr = idx + probe;
+ bottom = top + 1; /* to exit loop */
+ }
+ }
+
+ if ( ie_ptr ) {
+
+ if ( ie_ptr->delayed_flush >= f->shared->tick_num ) {
+
+ delay_write_until = ie_ptr->delayed_flush;
+ }
+ } else {
+
+ delay_write_until = f->shared->tick_num +
+ f->shared->vfd_swmr_config.max_lag;
+ }
+
+ if ( ( delay_write_until != 0 ) &&
+ ( ! ( ( delay_write_until >= f->shared->tick_num ) &&
+ ( delay_write_until <=
+ (f->shared->tick_num + f->shared->vfd_swmr_config.max_lag) )
+ )
+ )
+ )
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "VFD SWMR write delay out of range")
+
+ *delay_write_until_ptr = delay_write_until;
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5F_vfd_swmr_writer__delay_write() */
+
+
/*-------------------------------------------------------------------------
+ *
* Function: H5F_vfd_swmr_writer_end_of_tick
*
- * Purpose: Dummy right now
+ * Purpose: Main routine for managing the end of tick for the VFD
+ * SWMR writer.
+ *
+ * This function performs all end of tick operations for the
+ * writer -- specifically:
+ *
+ * 1) If requested, flush all raw data to the HDF5 file.
+ *
+ * (Not for first cut.)
+ *
+ * 2) Flush the metadata cache to the page buffer.
+ *
+ * 3) If this is the first tick (i.e. tick == 0), create the
+ * in memory version of the metadata file index.
+ *
+ * 4) Scan the page buffer tick list, and use it to update
+ * the metadata file index, adding or modifying entries as
+ * appropriate.
+ *
+ * 5) Scan the metadata file index for entries that can be
+ * removed -- specifically entries that have been written
+ * to the HDF5 file more than max_lag ticks ago, and haven't
+ * been modified since.
+ *
+ * (This is an optimization -- adress it later)
+ *
+ * 6) Scan the page buffer delayed write list for entries that
+ * may now be written, and move any such entries to the
+ * page buffer LRU.
+ *
+ * (For the first cut, we will assume file was just created,
+ * that there have been no flushes, and that no entries
+ * have been removed from the metadata file index. Under
+ * these circumstances, the delayed write list must always
+ * be empty. Thus delay implementing this.)
+ *
+ * 7) Update the metadata file. Must do this before we
+ * release the tick list, as otherwise the page buffer
+ * entry images may not be available.
+ *
+ * 8) Release the page buffer tick list.
+ *
+ * 9) Release any delayed writes whose delay has expired.
+ *
+ * 10) Increment the tick, and update the end of tick.
+ *
+ * In passing, generate log entries as appropriate.
*
* Return: SUCCEED/FAIL
*
+ * Programmer: John Mainzer 11/4/18
+ *
+ * Changes: None.
+ *
*-------------------------------------------------------------------------
*/
herr_t
H5F_vfd_swmr_writer_end_of_tick(void)
{
+ int32_t idx_entries_added = 0;
+ int32_t idx_entries_modified = 0;
+ int32_t idx_ent_not_in_tl = 0;
+ int32_t idx_ent_not_in_tl_flushed = 0;
+ H5F_t * f;
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(FAIL)
- if(vfd_swmr_file_g) {
+ f = vfd_swmr_file_g;
+
+ HDassert(f);
+ HDassert(f->shared);
+ HDassert(f->shared->pb_ptr);
+ HDassert(f->shared->vfd_swmr_writer);
+
+ /* 1) If requested, flush all raw data to the HDF5 file.
+ *
+ * (Not for first cut.)
+ */
+ if ( f->shared->vfd_swmr_config.flush_raw_data ) {
+
+ HDassert(FALSE);
+ }
+
+
+ /* 2) Flush the metadata cache to the page buffer. */
+ if ( H5AC_flush(f) < 0 )
+
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \
+ "Can't flush metadata cache to the page buffer")
+
+
+ /* 3) If this is the first tick (i.e. tick == 0), create the
+ * in memory version of the metadata file index.
+ */
+ if ( ( f->shared->tick_num == 0 ) &&
+ ( H5F__vfd_swmr_writer__create_index(f) < 0 ) )
+
+
+ /* 4) Scan the page buffer tick list, and use it to update
+ * the metadata file index, adding or modifying entries as
+ * appropriate.
+ */
+ if ( H5PB_vfd_swmr__update_index(f, &idx_entries_added,
+ &idx_entries_modified,
+ &idx_ent_not_in_tl,
+ &idx_ent_not_in_tl_flushed) < 0 )
+
+ HGOTO_ERROR(H5E_FILE, H5E_SYSTEM, FAIL, "can't update MD file index")
+
+
+ /* 5) Scan the metadata file index for entries that can be
+ * removed -- specifically entries that have been written
+ * to the HDF5 file more than max_lag ticks ago, and haven't
+ * been modified since.
+ *
+ * (This is an optimization -- adress it later)
+ */
+
+
+ /* 6) Scan the page buffer delayed write list for entries that
+ * may now be written, and move any such entries to the
+ * page buffer LRU.
+ *
+ * (For the first cut, we will assume file was just created,
+ * that there have been no flushes, and that no entries
+ * have been removed from the metadata file index. Under
+ * these circumstances, the delayed write list must always
+ * be empty. Thus delay implementing this.)
+ */
+ HDassert( f->shared->pb_ptr->dwl_len == 0 );
+
+
+ /* 7) Update the metadata file. Must do this before we
+ * release the tick list, as otherwise the page buffer
+ * entry images may not be available.
+ *
+ * Note that this operation will restore the index to
+ * sorted order.
+ */
+ if ( H5F_update_vfd_swmr_metadata_file(f,
+ (uint32_t)(f->shared->mdf_idx_entries_used + idx_entries_added),
+ f->shared->mdf_idx) < 0 )
+
+ HGOTO_ERROR(H5E_FILE, H5E_SYSTEM, FAIL, "can't update MD file")
+
+ /* at this point the metadata file index should be sorted -- update
+ * f->shared->mdf_idx_entries_used.
+ */
+ f->shared->mdf_idx_entries_used += idx_entries_added;
+
+ HDassert(f->shared->mdf_idx_entries_used <= f->shared->mdf_idx_len);
+
+
+ /* 8) Release the page buffer tick list. */
+ if ( H5PB_vfd_swmr__release_tick_list(f) < 0 )
+
+ HGOTO_ERROR(H5E_FILE, H5E_SYSTEM, FAIL, "can't release tick list")
+
+
+ /* 9) Release any delayed writes whose delay has expired */
+ if ( H5PB_vfd_swmr__release_delayed_writes(f) < 0 )
+
+ HGOTO_ERROR(H5E_FILE, H5E_SYSTEM, FAIL, "can't release delayed writes")
+
+
+ /* 10) Increment the tick, and update the end of tick. */
+ if( vfd_swmr_file_g ) {
+
/* Update end_of_tick */
- if(H5F__vfd_swmr_update_end_of_tick_and_tick_num(vfd_swmr_file_g, TRUE) < 0)
- HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, "unable to update end of tick")
+ if ( H5F__vfd_swmr_update_end_of_tick_and_tick_num(vfd_swmr_file_g,
+ TRUE) < 0 )
+
+ HGOTO_ERROR(H5E_FILE, H5E_CANTSET, FAIL, \
+ "unable to update end of tick")
}
done:
+
FUNC_LEAVE_NOAPI(ret_value)
+
} /* end H5F_vfd_swmr_writer_end_of_tick() */
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5F__vfd_swmr_writer__create_index
+ *
+ * Purpose: Allocate and initialize the index for the VFD SWMR metadata
+ * file.
+ *
+ * In the first cut at VFD SWMR, the index is of fixed size,
+ * as specified by the md_pages_reserved field of the VFD
+ * SWMR configuration. If we exceed this size we will simply
+ * abort. Needless to say, this will have to change in the
+ * production version, but it is good enough for the working
+ * prototype.
+ *
+ * Return: SUCCEED/FAIL
+ *
+ * Programmer: John Mainzer 11/5/18
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5F__vfd_swmr_writer__create_index(H5F_t * f)
+{
+ int i;
+ size_t bytes_available;
+ int32_t entries_in_index;
+ size_t index_size;
+ H5FD_vfd_swmr_idx_entry_t * index = NULL;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ f = vfd_swmr_file_g;
+
+ HDassert(f);
+ HDassert(f->shared);
+ HDassert(f->shared->vfd_swmr_writer);
+ HDassert(f->shared->mdf_idx == NULL);
+ HDassert(f->shared->mdf_idx_len == 0);
+ HDassert(f->shared->mdf_idx_entries_used == 0);
+
+ bytes_available = (size_t)f->shared->fs_page_size *
+ (size_t)(f->shared->vfd_swmr_config.md_pages_reserved) -
+ H5FD_MD_HEADER_SIZE;
+
+ HDassert(bytes_available > 0);
+
+ entries_in_index = (int32_t)(bytes_available / H5FD_MD_INDEX_ENTRY_SIZE);
+
+ HDassert(entries_in_index > 0);
+
+ index_size = sizeof(H5FD_vfd_swmr_idx_entry_t) * (size_t)entries_in_index;
+ index = (H5FD_vfd_swmr_idx_entry_t *)HDmalloc(index_size);
+
+ if ( index == NULL )
+
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, \
+ "memory allocation failed for md index")
+
+ for ( i = 0; i < entries_in_index; i++ ) {
+
+ index[i].hdf5_page_offset = 0;
+ index[i].md_file_page_offset = 0;
+ index[i].length = 0;
+ index[i].chksum = 0;
+ index[i].entry_ptr = NULL;
+ index[i].tick_of_last_change = 0;
+ index[i].clean = FALSE;
+ index[i].tick_of_last_flush = 0;
+ index[i].delayed_flush = 0;
+ index[i].moved_to_hdf5_file = FALSE;
+ }
+
+ f->shared->mdf_idx = index;
+ f->shared->mdf_idx_len = entries_in_index;
+ f->shared->mdf_idx_entries_used = 0;
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* end H5F__vfd_swmr_writer__create_index() */
+
/*-------------------------------------------------------------------------
* Function: H5F_vfd_swmr_reader_end_of_tick
*
diff --git a/src/H5Fpkg.h b/src/H5Fpkg.h
index 9e523de..d725f77 100644
--- a/src/H5Fpkg.h
+++ b/src/H5Fpkg.h
@@ -35,6 +35,7 @@
/* Other private headers needed by this file */
#include "H5private.h" /* Generic Functions */
#include "H5ACprivate.h" /* Metadata cache */
+#include "H5FDprivate.h" /* VFD -- for VFD SWMR */
#include "H5FLprivate.h" /* Free Lists */
#include "H5FOprivate.h" /* File objects */
#include "H5FSprivate.h" /* File free space */
@@ -381,24 +382,69 @@ struct H5F_file_t {
/* VFD SWMR */
/* Configuration info */
- H5F_vfd_swmr_config_t vfd_swmr_config; /* Copy of the VFD SWMR configuration from the
- FAPL used to open the file */
- hbool_t vfd_swmr; /* The file is opened with VFD SWMR configured or not*/
- hbool_t vfd_swmr_writer; /* This is the VFD SWMR writer or not */
+ H5F_vfd_swmr_config_t vfd_swmr_config; /* Copy of the VFD SWMR
+ * configuration from the
+ * FAPL used to open the file
+ */
+ hbool_t vfd_swmr; /* The file is opened with VFD
+ * SWMR configured or not
+ */
+ hbool_t vfd_swmr_writer; /* This is the VFD SWMR writer or
+ * not
+ */
uint64_t tick_num; /* Number of the current tick */
struct timespec end_of_tick; /* End time of the current tick */
+ /* VFD SWMR metadata file index */
+ H5FD_vfd_swmr_idx_entry_t * mdf_idx; /* pointer to an array of instance
+ * of H5FD_vfd_swmr_idx_entry_t of
+ * length mdf_idx_len. This array
+ * is used by the vfd swmr writer
+ * to assemble the metadata file
+ * index at the end of each tick,
+ * and by the vfd swmr readers to
+ * track changes in the index.
+ * With one brief exception during
+ * writer end of tick processing,
+ * this index will alwasy be sorted
+ * in increasing HDF5 file page
+ * offset order.
+ *
+ * This field should be NULL unless
+ * the index is defined.
+ */
+ int32_t mdf_idx_len; /* number of entries in the array
+ * of instances of
+ * H5FD_vfd_swmr_idx_entry_t pointed
+ * to by mdf_idx above. Note that
+ * not all entries in the index
+ * need be used.
+ */
+ int32_t mdf_idx_entries_used; /* Number of entries in *mdf_idx
+ * that are in use -- these will
+ * be contiguous at indicies 0
+ * through mdf_idx_entries_used - 1.
+ */
+
/* Metadata file for VFD SWMR writer */
- int vfd_swmr_md_fd; /* POSIX: file descriptor for the metadata file */
- haddr_t vfd_swmr_md_eoa; /* POSIX: eoa for the metadata file */
+ int vfd_swmr_md_fd; /* POSIX: file descriptor for the
+ * metadata file
+ */
+ haddr_t vfd_swmr_md_eoa; /* POSIX: eoa for the metadata
+ * file
+ */
/* Free space manager for the metadata file */
H5FS_t *fs_man_md; /* Free-space manager */
- H5F_fs_state_t fs_state_md; /* State of the free space manager */
+ H5F_fs_state_t fs_state_md; /* State of the free space
+ * manager
+ */
/* Delayed free space release doubly linked list */
uint32_t dl_len; /* # of entries in the list */
- H5F_vfd_swmr_dl_entry_t *dl_head_ptr; /* Points to the beginning of the list */
+ H5F_vfd_swmr_dl_entry_t *dl_head_ptr; /* Points to the beginning of
+ * the list
+ */
H5F_vfd_swmr_dl_entry_t *dl_tail_ptr; /* Points to the end of the list */
};
diff --git a/src/H5Fprivate.h b/src/H5Fprivate.h
index dc407c8..cad92fa 100644
--- a/src/H5Fprivate.h
+++ b/src/H5Fprivate.h
@@ -879,6 +879,8 @@ H5_DLL herr_t H5F_cwfs_remove_heap(H5F_file_t *shared, struct H5HG_heap_t *heap)
H5_DLL herr_t H5F_debug(H5F_t *f, FILE * stream, int indent, int fwidth);
/* VFD SWMR */
+H5_DLL herr_t H5F_vfd_swmr_writer__delay_write(H5F_t *f, uint64_t page,
+ uint64_t * delay_write_until_ptr);
H5_DLL herr_t H5F_vfd_swmr_writer_end_of_tick(void);
H5_DLL herr_t H5F_vfd_swmr_reader_end_of_tick(void);
H5_DLL herr_t H5F_update_vfd_swmr_metadata_file(H5F_t *f, uint32_t index_len, struct H5FD_vfd_swmr_idx_entry_t *index);
diff --git a/src/H5Fpublic.h b/src/H5Fpublic.h
index 9c47098..c2bfb21 100644
--- a/src/H5Fpublic.h
+++ b/src/H5Fpublic.h
@@ -222,8 +222,8 @@ typedef herr_t (*H5F_flush_cb_t)(hid_t object_id, void *udata);
#define H5F__MAX_VFD_SWMR_FILE_NAME_LEN 1024
typedef struct H5F_vfd_swmr_config_t {
int32_t version;
- int32_t tick_len;
- int32_t max_lag;
+ uint32_t tick_len;
+ uint32_t max_lag;
hbool_t vfd_swmr_writer;/****/
hbool_t flush_raw_data;
int32_t md_pages_reserved;
diff --git a/src/H5PB.c b/src/H5PB.c
index c89c381..a3cac89 100644
--- a/src/H5PB.c
+++ b/src/H5PB.c
@@ -97,7 +97,7 @@ static herr_t H5PB__make_space(H5F_t *f, H5PB_t *pb_ptr,
static herr_t H5PB__mark_entry_clean(H5PB_t *pb_ptr,
H5PB_entry_t *entry_ptr);
-static herr_t H5PB__mark_entry_dirty(H5PB_t *pb_ptr,
+static herr_t H5PB__mark_entry_dirty(H5F_t * f, H5PB_t *pb_ptr,
H5PB_entry_t *entry_ptr);
static herr_t H5PB__read_meta(H5F_t *f, H5FD_mem_t type, haddr_t addr,
@@ -188,6 +188,10 @@ H5PB_reset_stats(H5PB_t *pb_ptr)
pb_ptr->failed_ht_searches = 0;
pb_ptr->total_failed_ht_search_depth = 0;
pb_ptr->max_index_len = 0;
+ pb_ptr->max_clean_index_len = 0;
+ pb_ptr->max_dirty_index_len = 0;
+ pb_ptr->max_clean_index_size = 0;
+ pb_ptr->max_dirty_index_size = 0;
pb_ptr->max_index_size = 0;
pb_ptr->max_rd_pages = 0;
pb_ptr->max_md_pages = 0;
@@ -389,10 +393,17 @@ H5PB_print_stats(const H5PB_t *pb_ptr)
* buffer is configured to allow pages of the specified
* type.
*
- * This function is called by the
- * from the MF layer when a new page is allocated to
- * indicate to the page buffer layer that a read of the page
- * from the file is not necessary since it's an empty page.
+ * This function is called by the MF layer when a new page
+ * is allocated to indicate to the page buffer layer that
+ * a read of the page from the file is not necessary since
+ * it's an empty page.
+ *
+ * For purposes of the VFD SWMR writer, we also track pages
+ * that are inserted via this call, as the fact that the
+ * page was allocated implies that an earlier version does
+ * not exist in the HDF5 file, and thus we need not concern
+ * ourselves with delaying the write of this pages to avoid
+ * messages from the future on the reader.
*
* Note that this function inserts the new page without
* attempting to make space. This can result in the page
@@ -452,6 +463,9 @@ H5PB_add_new_page(H5F_t *f, H5FD_mem_t type, haddr_t page_addr)
HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
"new page buffer page creation failed.")
+ /* make note that this page was allocated, not loaded from file */
+ entry_ptr->loaded = FALSE;
+
/* updates stats */
H5PB__UPDATE_STATS_FOR_INSERTION(pb_ptr, entry_ptr);
}
@@ -538,53 +552,61 @@ H5PB_create(H5F_t *f, size_t size, unsigned page_buf_min_meta_perc,
/* initialize the new instance of H5PB_t */
- pb_ptr->magic = H5PB__H5PB_T_MAGIC;
- pb_ptr->page_size = f->shared->fs_page_size;
+ pb_ptr->magic = H5PB__H5PB_T_MAGIC;
+ pb_ptr->page_size = f->shared->fs_page_size;
H5_CHECKED_ASSIGN(pb_ptr->page_size, size_t, \
f->shared->fs_page_size, hsize_t);
- pb_ptr->max_pages = (int32_t)(size / f->shared->fs_page_size);
- pb_ptr->curr_pages = 0;
- pb_ptr->curr_md_pages = 0;
- pb_ptr->curr_rd_pages = 0;
- pb_ptr->min_md_pages = min_md_pages;
- pb_ptr->min_rd_pages = min_rd_pages;
+ pb_ptr->max_pages = (int32_t)(size / f->shared->fs_page_size);
+ pb_ptr->curr_pages = 0;
+ pb_ptr->curr_md_pages = 0;
+ pb_ptr->curr_rd_pages = 0;
+ pb_ptr->min_md_pages = min_md_pages;
+ pb_ptr->min_rd_pages = min_rd_pages;
- pb_ptr->max_size = size;
- pb_ptr->min_meta_perc = page_buf_min_meta_perc;
- pb_ptr->min_raw_perc = page_buf_min_raw_perc;
+ pb_ptr->max_size = size;
+ pb_ptr->min_meta_perc = page_buf_min_meta_perc;
+ pb_ptr->min_raw_perc = page_buf_min_raw_perc;
/* index */
for ( i = 0; i < H5PB__HASH_TABLE_LEN; i++ )
- pb_ptr->ht[i] = NULL;
- pb_ptr->index_len = 0;
- pb_ptr->index_size = 0;
+ pb_ptr->ht[i] = NULL;
+ pb_ptr->index_len = 0;
+ pb_ptr->clean_index_len = 0;
+ pb_ptr->dirty_index_len = 0;
+ pb_ptr->index_size = 0;
+ pb_ptr->clean_index_size = 0;
+ pb_ptr->dirty_index_size = 0;
+ pb_ptr->il_len = 0;
+ pb_ptr->il_size = 0;
+ pb_ptr->il_head = NULL;
+ pb_ptr->il_tail = NULL;
/* LRU */
- pb_ptr->LRU_len = 0;
- pb_ptr->LRU_size = 0;
- pb_ptr->LRU_head_ptr = NULL;
- pb_ptr->LRU_tail_ptr = NULL;
+ pb_ptr->LRU_len = 0;
+ pb_ptr->LRU_size = 0;
+ pb_ptr->LRU_head_ptr = NULL;
+ pb_ptr->LRU_tail_ptr = NULL;
/* VFD SWMR specific fields.
* The following fields are defined iff vfd_swmr_writer is TRUE.
*/
- pb_ptr->vfd_swmr_writer = FALSE;
- pb_ptr->mpmde_count = 0;
- pb_ptr->cur_tick = 0;
+ pb_ptr->vfd_swmr_writer = FALSE;
+ pb_ptr->mpmde_count = 0;
+ pb_ptr->cur_tick = 0;
/* delayed write list */
- pb_ptr->max_delay = 0;
- pb_ptr->dwl_len = 0;
- pb_ptr->dwl_size = 0;
- pb_ptr->dwl_head_ptr = NULL;
- pb_ptr->dwl_tail_ptr = NULL;
+ pb_ptr->max_delay = 0;
+ pb_ptr->dwl_len = 0;
+ pb_ptr->dwl_size = 0;
+ pb_ptr->dwl_head_ptr = NULL;
+ pb_ptr->dwl_tail_ptr = NULL;
/* tick list */
- pb_ptr->tl_len = 0;
- pb_ptr->tl_size = 0;
- pb_ptr->tl_head_ptr = NULL;
- pb_ptr->tl_tail_ptr = NULL;
+ pb_ptr->tl_len = 0;
+ pb_ptr->tl_size = 0;
+ pb_ptr->tl_head_ptr = NULL;
+ pb_ptr->tl_tail_ptr = NULL;
H5PB_reset_stats(pb_ptr);
@@ -1232,6 +1254,453 @@ done:
/*-------------------------------------------------------------------------
*
+ * Function: H5PB_vfd_swmr__release_delayed_writes
+ *
+ * Purpose: After the tick list has been released, and before the
+ * beginning of the next tick, we must scan the delayed
+ * write list, and release those entries whose delays have
+ * expired.
+ *
+ * Note that pages of metadata, and multi-page metadata entries
+ * are handled differently.
+ *
+ * Regular pages are removed from the delayed write list and
+ * inserted in the replacement policy
+ *
+ * In contrast, multi-page metadata entries are simply
+ * flushed and evicted.
+ *
+ * Since the delayed write list is sorted in decreasing
+ * delay_write_until order, we start our scan at the bottom
+ * of the delayed write list and continue upwards until no
+ * expired entries remain.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer -- 11/15/18
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5PB_vfd_swmr__release_delayed_writes(H5F_t * f)
+{
+ H5PB_t * pb_ptr = NULL;
+ H5PB_entry_t *entry_ptr = NULL;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /* Sanity checks */
+ HDassert(f);
+ HDassert(f->shared);
+ HDassert(f->shared->vfd_swmr);
+ HDassert(f->shared->vfd_swmr_writer);
+
+ pb_ptr = f->shared->pb_ptr;
+
+ HDassert(pb_ptr);
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert(pb_ptr->vfd_swmr_writer);
+
+ while ( ( pb_ptr->dwl_tail_ptr ) &&
+ ( pb_ptr->dwl_tail_ptr->delay_write_until <
+ f->shared->tick_num ) ) {
+
+ entry_ptr = pb_ptr->dwl_tail_ptr;
+
+ HDassert(entry_ptr->is_dirty);
+
+ H5PB__REMOVE_FROM_DWL(pb_ptr, entry_ptr, FAIL)
+
+ entry_ptr->delay_write_until = 0;
+
+ if ( entry_ptr->is_mpmde ) { /* flush and evict now */
+
+ if ( H5PB__flush_entry(f, pb_ptr, entry_ptr) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, \
+ "flush of mpmde failed")
+
+ if ( H5PB__evict_entry(pb_ptr, entry_ptr, TRUE) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "eviction of mpmde failed")
+
+ } else { /* insert it in the replacement policy */
+
+ H5PB__UPDATE_RP_FOR_INSERT_APPEND(pb_ptr, entry_ptr, FAIL)
+ }
+ }
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5PB_vfd_swmr__release_delayed_writes() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5PB_vfd_swmr__release_tick_list
+ *
+ * Purpose: After the metadata file has been updated, and before the
+ * beginning of the next tick, we must release the tick list.
+ *
+ * This function performs this function.
+ *
+ * In passing, flush and evict any multi-page metadata entries
+ * that are not subject to a delayed write.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer -- 11/12/18
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5PB_vfd_swmr__release_tick_list(H5F_t * f)
+{
+ H5PB_t * pb_ptr = NULL;
+ H5PB_entry_t *entry_ptr = NULL;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /* Sanity checks */
+ HDassert(f);
+ HDassert(f->shared);
+ HDassert(f->shared->vfd_swmr);
+ HDassert(f->shared->vfd_swmr_writer);
+
+ pb_ptr = f->shared->pb_ptr;
+
+ HDassert(pb_ptr);
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert(pb_ptr->vfd_swmr_writer);
+
+ /* remove all entries from the tick list */
+ while ( pb_ptr->tl_head_ptr ) {
+
+ entry_ptr = pb_ptr->tl_head_ptr;
+
+ H5PB__REMOVE_FROM_TL(pb_ptr, entry_ptr, FAIL)
+
+ entry_ptr->modified_this_tick = FALSE;
+
+ if ( entry_ptr->is_mpmde ) {
+
+ HDassert(entry_ptr->is_dirty);
+
+ if ( entry_ptr->delay_write_until == 0 ) {
+
+ /* flush and evict the multi-page metadata entry immediately */
+ if ( H5PB__flush_entry(f, pb_ptr, entry_ptr) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_WRITEERROR, FAIL, \
+ "flush of mpmde failed")
+
+ if ( H5PB__evict_entry(pb_ptr, entry_ptr, TRUE) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "eviction of mpmde failed")
+ }
+ }
+ /* if the entry is not a multi-page metadata entry, it must already
+ * be on either the replacment policy or the delayed write list.
+ * In either case, it will be flush when possible and necessary.
+ */
+ }
+
+ HDassert(pb_ptr->tl_head_ptr == NULL);
+ HDassert(pb_ptr->tl_tail_ptr == NULL);
+ HDassert(pb_ptr->tl_len == 0);
+ HDassert(pb_ptr->tl_size == 0);
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5PB_vfd_swmr__release_tick_list */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5PB_vfd_swmr__update_index
+ *
+ * Purpose: In the VFD SWMR writer, all metadata writes to the page
+ * buffer during a tick are buffered in the page buffer in
+ * the tick list. Further, the metadata cache is flushed
+ * to the page buffer at the end of the tick so that all
+ * metadata changes during the tick are reflected in the
+ * tick list.
+ *
+ * Once this is done, the internal representation of the
+ * metadata file index must be updated from the tick list
+ * so that the metadata file can be updated, and the tick
+ * list can be emptied and prepared to buffer metadata changes
+ * in the next tick.
+ *
+ * This function is called to accomplish this. Its cycle of
+ * operation is as follows:
+ *
+ * 1) Scan the tick list. For each entry (*pbe_ptr), test
+ * to see if it appears in the index.
+ *
+ * If it does the entry must have been modified in the
+ * past tick. Update the index entry (*ie_ptr) as follows:
+ *
+ * a) Set ie_ptr->entry_ptr = pbe_ptr->image_ptr. This
+ * is needed to give the metadata file update code
+ * access to the image of the target page or multi-page
+ * multi-date entry. Note that ie_ptr->entry_ptr will
+ * be set to NULL as soon as the metadata file is updated,
+ * so the buffer pointed to by pbe_ptr->image_ptr can
+ * be safely discarded at any time after the metadata
+ * file update.
+ *
+ * b) Set ie_ptr->tick_of_last_change to the current tick.
+ *
+ * c) If pbe_ptr->is_dirty, set ie_ptr->clean to FALSE.
+ * If pbe_ptr->is_dirty is FALSE, set ie_ptr->clean
+ * to TRUE and set ie_ptr->tick_of_last_flush to the
+ * current tick.
+ *
+ * If the tick list entry (*pbe_ptr) doesn't appear in
+ * the index, allocate a metadata file index entry (*ie_ptr),
+ * and initialize it as follows:
+ *
+ * ie_ptr->hdf5_page_offset = pbe_ptr->page
+ * ie_ptr->length = pbe_ptr->size
+ * ie_ptr->delayed_flush = pbe_ptr->delay_write_until
+ *
+ * and then update the new entry as per the existing entry
+ * case described above.
+ *
+ * 2) Scan the internal representation of the metadata file
+ * index for entries that do not appear in the tick list.
+ * For each such entry (*ie_ptr), proceed as follows:
+ *
+ * 1) If ie_ptr->clean, we are done -- proceed to the
+ * next index entry that doesn't appear in the tick list.
+ *
+ * 2) Test to see if the cognate entry appears in the page
+ * buffer. If it doesn't, it must have been flushed and
+ * evicted in the past tick. Set
+ *
+ * ie_ptr->clean = TRUE, and
+ *
+ * ie_ptr->tick_of_last_flush = current tick
+ *
+ * and proceed to the next index entry that doesn't
+ * appear in the tick list.
+ *
+ * 3) If the cognate entry does appear in the page buffer
+ * and is clean, proceed as per 2) above.
+ *
+ * 4) In all other cases, do nothing, and proceed to the
+ * next index entry that does not appear in the tick list.
+ *
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Programmer: John Mainzer -- 11/9/18
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5PB_vfd_swmr__update_index(H5F_t * f,
+ int * idx_ent_added_ptr,
+ int * idx_ent_modified_ptr,
+ int * idx_ent_not_in_tl_ptr,
+ int * idx_ent_not_in_tl_flushed_ptr)
+{
+ int32_t i;
+ int32_t idx_ent_added = 0;
+ int32_t idx_ent_modified = 0;
+ int32_t idx_ent_not_in_tl = 0;
+ int32_t idx_ent_not_in_tl_flushed = 0;
+ H5PB_t * pb_ptr = NULL;
+ H5PB_entry_t *pbe_ptr = NULL;
+ H5FD_vfd_swmr_idx_entry_t * ie_ptr = NULL;
+ H5FD_vfd_swmr_idx_entry_t * idx = NULL;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /* Sanity checks */
+ HDassert(f);
+ HDassert(f->shared);
+ HDassert(f->shared->vfd_swmr);
+ HDassert(f->shared->vfd_swmr_writer);
+
+ idx = f->shared->mdf_idx;
+
+ HDassert(idx);
+
+ pb_ptr = f->shared->pb_ptr;
+
+ HDassert(pb_ptr);
+ HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
+ HDassert(pb_ptr->vfd_swmr_writer);
+
+ HDassert(idx_ent_added_ptr);
+ HDassert(idx_ent_modified_ptr);
+ HDassert(idx_ent_not_in_tl_ptr);
+ HDassert(idx_ent_not_in_tl_flushed_ptr);
+
+ /* scan the tick list and insert or update metadata file index entries
+ * as appropriate.
+ */
+
+ pbe_ptr = pb_ptr->tl_head_ptr;
+
+ while ( pbe_ptr ) {
+
+ uint64_t target_page;
+ int32_t top;
+ int32_t bottom;
+ int32_t probe;
+
+ HDassert(pbe_ptr->magic == H5PB__H5PB_ENTRY_T_MAGIC);
+
+ /* do a binary search on the metadata file index to see if
+ * it already contains an entry for *pbe_ptr.
+ */
+
+ ie_ptr = NULL;
+ top = f->shared->mdf_idx_entries_used - 1;
+ bottom = 0;
+ target_page = pbe_ptr->page;
+
+ while ( top >= bottom ) {
+
+ probe = top + bottom / 2;
+
+ if ( idx[probe].hdf5_page_offset < target_page ) {
+
+ bottom = probe + 1;
+
+ } else if ( idx[probe].hdf5_page_offset > target_page ) {
+
+ top = probe - 1;
+
+ } else { /* found it */
+
+ ie_ptr = idx + probe;
+ bottom = top + 1; /* to exit loop */
+ }
+ }
+
+ if ( ie_ptr == NULL ) { /* alloc new entry in the metadata file index*/
+
+ /* for now the metadata file index is of fixed size -- if we
+ * exceed the maximum size, just abort.
+ *
+ * Obviously, this must be fixed for the production version.
+ */
+ int32_t new_index_entry_index;
+
+ new_index_entry_index = f->shared->mdf_idx_entries_used +
+ idx_ent_added++;
+
+ if ( new_index_entry_index >= f->shared->mdf_idx_len ) {
+
+ HDfprintf(stderr, "\n\nmax mdf index len exceeded.\n\n");
+ exit(1);
+ }
+
+ ie_ptr = idx + new_index_entry_index;
+
+ /* partial initialization of new entry -- rest done later */
+ ie_ptr->hdf5_page_offset = target_page;
+ ie_ptr->md_file_page_offset = 0; /* undefined at this point */
+ ie_ptr->length = (uint32_t)(pbe_ptr->size);
+ ie_ptr->chksum = 0; /* undefined at this point */
+ /* ie_ptr->entry_ptr initialized below */
+ /* ie_ptr->tick_of_last_change initialized below */
+ /* ie_ptr->clean initialized below */
+ /* ie_ptr->tick_of_last_flush initialized below */
+ ie_ptr->delayed_flush = pbe_ptr->delay_write_until;
+ ie_ptr->moved_to_hdf5_file = FALSE;
+
+ } else {
+
+ idx_ent_modified++;
+ }
+
+ ie_ptr->entry_ptr = pbe_ptr->image_ptr;
+ ie_ptr->tick_of_last_change = f->shared->tick_num;
+ ie_ptr->clean = !(pbe_ptr->is_dirty);
+
+ if ( ie_ptr->clean ) {
+
+ ie_ptr->tick_of_last_flush = f->shared->tick_num;
+
+ } else {
+
+ ie_ptr->tick_of_last_flush = 0;
+ }
+
+ HDassert(ie_ptr);
+ }
+
+ /* scan the metadata file index for entries that don't appear in the
+ * tick list. If the index entry is dirty, and either doesn't appear
+ * in the page buffer, or is clean in the page buffer, mark the index
+ * entry clean and as having been flushed in the current tick.
+ */
+ for ( i = 0; i < f->shared->mdf_idx_entries_used; i++ ) {
+
+ HDassert( ( i == 0 ) ||
+ ( idx[i - 1].hdf5_page_offset < idx[i].hdf5_page_offset ) );
+
+ if ( idx[i].tick_of_last_change < f->shared->tick_num ) {
+
+ idx_ent_not_in_tl++;
+
+ ie_ptr = idx + i;
+
+ if ( ! ( ie_ptr->clean ) ) {
+
+ H5PB__SEARCH_INDEX(pb_ptr, ie_ptr->hdf5_page_offset, \
+ pbe_ptr, FAIL);
+
+ if ( ( ! pbe_ptr ) || ( ! ( pbe_ptr->is_dirty ) ) ) {
+
+ idx_ent_not_in_tl_flushed++;
+ ie_ptr->clean = TRUE;
+ ie_ptr->tick_of_last_flush = f->shared->tick_num;
+ }
+ }
+ }
+ }
+
+ HDassert(idx_ent_modified + idx_ent_not_in_tl ==
+ f->shared->mdf_idx_entries_used);
+
+ HDassert(idx_ent_modified + idx_ent_not_in_tl + idx_ent_added <=
+ f->shared->mdf_idx_len);
+
+ *idx_ent_added_ptr = idx_ent_added;
+ *idx_ent_modified_ptr = idx_ent_modified;
+ *idx_ent_not_in_tl_ptr = idx_ent_not_in_tl;
+ *idx_ent_not_in_tl_flushed_ptr = idx_ent_not_in_tl_flushed;
+
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5PB_vfd_swmr__update_index */
+
+
+/*-------------------------------------------------------------------------
+ *
* Function: H5PB_write
*
* Purpose: Write data into the Page Buffer if practical, and to file
@@ -1275,14 +1744,17 @@ done:
* one page, and vfd_swmr_writer is TRUE, the write must
* buffered in the page buffer until the end of the tick.
*
- * Create a multi-page metadata entry in the page buffer
- * and copy the write into it. Insert the new entry in
- * the tick list.
+ * If it doesn't exist already, create a multi-page metadata
+ * entry in the page buffer and copy the write into it.
+ * Insert the new entry in the tick list if necessary.
*
* Test to see if the write of the multi-page metadata
* entry must be delayed. If so, place the entry in
- * the delayed write list. Otherwise, write the multi-page
- * metadata entry to the HDF5 file.
+ * the delayed write list. Otherwise, the multi-page
+ * metadata entry will be written to the HDF5 file and
+ * evicted when the tick list is released at the of the
+ * tick.
+ *
*
* 8) If the write is of metadata, and the write is of size
* less than or equal to the page size, write the data
@@ -1329,7 +1801,7 @@ H5PB_write(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
HDassert(pb_ptr->magic == H5PB__H5PB_T_MAGIC);
- if ( H5FD_MEM_DRAW == type ) { /* raw data read */
+ if ( H5FD_MEM_DRAW == type ) { /* raw data write */
if ( pb_ptr->min_md_pages == pb_ptr->max_pages ) {
@@ -1337,7 +1809,7 @@ H5PB_write(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
bypass_pb = TRUE;
}
- } else { /* metadata read */
+ } else { /* metadata write */
if ( pb_ptr->min_rd_pages == pb_ptr->max_pages ) {
@@ -1483,6 +1955,8 @@ H5PB__allocate_page(H5PB_t *pb_ptr, size_t size, hbool_t clean_image)
/* fields supporting the hash table */
entry_ptr->ht_prev = NULL;
entry_ptr->ht_next = NULL;
+ entry_ptr->il_prev = NULL;
+ entry_ptr->il_next = NULL;
/* fields supporting replacement policise */
entry_ptr->next = NULL;
@@ -1675,6 +2149,8 @@ H5PB__deallocate_page(H5PB_entry_t *entry_ptr)
HDassert(!(entry_ptr->is_dirty));
HDassert(entry_ptr->ht_next == NULL);
HDassert(entry_ptr->ht_prev == NULL);
+ HDassert(entry_ptr->il_next == NULL);
+ HDassert(entry_ptr->il_prev == NULL);
HDassert(entry_ptr->next == NULL);
HDassert(entry_ptr->prev == NULL);
HDassert(entry_ptr->tl_next == NULL);
@@ -1695,7 +2171,7 @@ H5PB__deallocate_page(H5PB_entry_t *entry_ptr)
*
* Purpose: Evict the target entry from the from the page buffer, and
* de-allocate its associated image and instance of
- * H5PB_entry_t..
+ * H5PB_entry_t.
*
* In general, entries must be clean before they can be
* evicted, and the minimum metadata and raw data limits
@@ -1835,8 +2311,7 @@ H5PB__flush_entry(H5F_t *f, H5PB_t *pb_ptr, H5PB_entry_t *entry_ptr)
HDassert(entry_ptr->image_ptr);
HDassert(entry_ptr->is_dirty);
HDassert((pb_ptr->vfd_swmr_writer) || (!(entry_ptr->is_mpmde)));
- HDassert( ( ! (pb_ptr->vfd_swmr_writer) ) ||
- ( (pb_ptr->cur_tick) >= (entry_ptr->delay_write_until) ) );
+ HDassert(0 == (entry_ptr->delay_write_until));
/* Retrieve the 'eoa' for the file */
if ( HADDR_UNDEF == (eoa = H5F_get_eoa(f, entry_ptr->mem_type)) )
@@ -1898,11 +2373,14 @@ H5PB__flush_entry(H5F_t *f, H5PB_t *pb_ptr, H5PB_entry_t *entry_ptr)
}
/* mark the entry clean */
- entry_ptr->is_dirty = FALSE;
+ if ( H5PB__mark_entry_clean(pb_ptr, entry_ptr) < 0 )
+
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, "mark entry clean failed")
/* if the entry is on the LRU, update the replacement policy */
- if ( ! (entry_ptr->is_mpmde) ) {
+ if ( ( ! (entry_ptr->is_mpmde) ) &&
+ ( entry_ptr->delay_write_until == 0 ) ) {
H5PB__UPDATE_RP_FOR_FLUSH(pb_ptr, entry_ptr, FAIL)
}
@@ -1929,6 +2407,14 @@ done:
* even in the VFD SWMR case, as in this context, multi-page
* metadata entries are always written in full, and they
* may only enter the page buffer as the result of a write.
+ *
+ * In the context of VFD SWMR, when an page is loaded from
+ * file, it is possible that the VFD SWMR writer must delay
+ * writes to the page to avoid the possibility of message from
+ * the future bugs on the VFD SWMR reader. For this reason,
+ * make note of the fact that the entry has be loaded from
+ * from file, so that the necessary checks can be made when
+ * writing to the page.
*
* Return: SUCCEED if no errors are encountered, and
* FAIL otherwise.
@@ -1986,7 +2472,6 @@ H5PB__load_page(H5F_t *f, H5PB_t *pb_ptr, haddr_t addr, H5FD_mem_t type,
*/
skip_read = (addr >= eof);
-
/* make space in the page buffer if necessary */
if ( ( pb_ptr->curr_pages >= pb_ptr->max_pages ) &&
( H5PB__make_space(f, pb_ptr, type) < 0 ) )
@@ -2024,6 +2509,11 @@ H5PB__load_page(H5F_t *f, H5PB_t *pb_ptr, haddr_t addr, H5FD_mem_t type,
HGOTO_ERROR(H5E_PAGEBUF, H5E_READERROR, FAIL, \
"driver read request failed")
+ /* If in fact the page was read from file, make note of this fact
+ * for purposes of VFD SWMR delayed writes in the VFD SWMR writer.
+ */
+ entry_ptr->loaded = ! skip_read;
+
H5PB__UPDATE_STATS_FOR_LOAD(pb_ptr, entry_ptr)
if ( entry_ptr_ptr ) {
@@ -2233,6 +2723,10 @@ done:
* this case, the entry must be marked clean to avoid
* sanity check failures on evictions.
*
+ * While this function does update the index for the
+ * entry clean, it does not update the replacement policy.
+ * If this is desired, it must be done by the caller.
+ *
* Return: Non-negative on success/Negative on failure
*
* Programmer: John Mainzer -- 10/14/18
@@ -2262,13 +2756,12 @@ H5PB__mark_entry_clean(H5PB_t *pb_ptr, H5PB_entry_t *entry_ptr)
/* mark the entry clean */
entry_ptr->is_dirty = FALSE;
- /* delete this once we start tracking clean and dirty entry is the hash
- * table.
- */
- if ( ! (entry_ptr->is_mpmde) ) {
+ /* update the index for the entry clean */
+ H5PB__UPDATE_INDEX_FOR_ENTRY_CLEAN(pb_ptr, entry_ptr)
- H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, FAIL)
- }
+ /* don't update the replacement policy -- this will be done by
+ * the caller if desired.
+ */
done:
@@ -2283,14 +2776,17 @@ done:
*
* Purpose: Mark the target entry as dirty.
*
- * Under normal circumstances, the entry will be in the
- * replacement policy. In this, also update the replacement
- * policy for and access.
- *
- * If pb_ptr->vfd_swmr_writer, it is possible that the target
- * is a multi-page metadata entry. In this case, the entry
- * is not in the replacement policy, and thus the policy
- * should not be updated.
+ * If pb_ptr->vfd_swmr_writer is FALSE, the entry will be
+ * in the replacement policy. In this, we simply mark the
+ * entry as dirty, and update the replacement policy for an
+ * access.
+ *
+ * If pb_ptr->vfd_swmr_writer, it is possible that we must
+ * delay writes to the target page or multi-page metadata
+ * entry to avoid message from the future bugs on the VFD
+ * SWMR readers. In such cases we must set the
+ * delay_write_until field and insert the entry on the
+ * delayed write list instead of the replacement policy.
*
* Return: Non-negative on success/Negative on failure
*
@@ -2301,8 +2797,9 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
-H5PB__mark_entry_dirty(H5PB_t *pb_ptr, H5PB_entry_t *entry_ptr)
+H5PB__mark_entry_dirty(H5F_t * f, H5PB_t *pb_ptr, H5PB_entry_t *entry_ptr)
{
+ uint64_t delay_write_until = 0;
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(FAIL)
@@ -2318,14 +2815,49 @@ H5PB__mark_entry_dirty(H5PB_t *pb_ptr, H5PB_entry_t *entry_ptr)
HDassert(entry_ptr->image_ptr);
HDassert((pb_ptr->vfd_swmr_writer) || (!(entry_ptr->is_mpmde)));
- /* mark the entry dirty */
- entry_ptr->is_dirty = TRUE;
+ /* mark the entry dirty if necessary */
+ if ( ! ( entry_ptr->is_dirty ) ) {
- /* if the entry is on the LRU, update the replacement policy */
- if ( ( ! (entry_ptr->is_mpmde) ) &&
- ( entry_ptr->delay_write_until == 0 ) ) {
+ entry_ptr->is_dirty = TRUE;
+
+ H5PB__UPDATE_INDEX_FOR_ENTRY_DIRTY(pb_ptr, entry_ptr)
+
+ /* since the entry was clean, there can be no pending delayed write */
+ HDassert(entry_ptr->delay_write_until == 0);
+
+ if ( ( pb_ptr->vfd_swmr_writer ) &&
+ ( entry_ptr->loaded ) &&
+ ( H5F_vfd_swmr_writer__delay_write(f, entry_ptr->page,
+ &delay_write_until) < 0 ) )
- H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, FAIL)
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "get delayed write request failed")
+
+ if ( delay_write_until > 0 ) {
+
+ H5PB__INSERT_IN_DWL(pb_ptr, entry_ptr, FAIL)
+
+ } else if ( ! (entry_ptr->is_mpmde) ) {
+
+ H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, FAIL)
+
+ } else {
+
+ /* the entry should be a multi-page metadata entry that
+ * has been modified this tick. Thus it is only on the
+ * tick list, and no action is required.
+ */
+ HDassert(entry_ptr->modified_this_tick);
+ HDassert(entry_ptr->is_mpmde);
+ HDassert(pb_ptr->vfd_swmr_writer);
+ }
+ } else if ( ( ! (entry_ptr->is_mpmde) ) &&
+ ( entry_ptr->delay_write_until == 0 ) ) {
+
+ /* the entry is dirty and on the replacement policy -- just update
+ * the replacement policy for an access
+ */
+ H5PB__UPDATE_RP_FOR_ACCESS(pb_ptr, entry_ptr, FAIL)
}
done:
@@ -3054,14 +3586,16 @@ done:
* one page, and vfd_swmr_writer is TRUE, the write must
* buffered in the page buffer until the end of the tick.
*
- * Create a multi-page metadata entry in the page buffer
- * and copy the write into it. Insert the new entry in
- * the tick list.
+ * If it doesn't exist already, create a multi-page metadata
+ * entry in the page buffer and copy the write into it.
+ * Insert the new entry in the tick list if necessary.
*
* Test to see if the write of the multi-page metadata
* entry must be delayed. If so, place the entry in
- * the delayed write list. Otherwise, write the multi-page
- * metadata entry to the HDF5 file.
+ * the delayed write list. Otherwise, the multi-page
+ * metadata entry will be written to the HDF5 file and
+ * evicted when the tick list is released at the of the
+ * tick.
*
* 8) If the write is of metadata, and the write is of size
* less than or equal to the page size, write the data
@@ -3154,6 +3688,8 @@ H5PB__write_meta(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
* test to see if it should be, and move it from the
* LRU to the delayed write list and set the delay_write_until
* field appropriately.
+ *
+ * This is done via the call to H5PB__mark_entry_dirty()
*/
HDassert(pb_ptr->vfd_swmr_writer);
HDassert(addr == page_addr);
@@ -3177,6 +3713,11 @@ H5PB__write_meta(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
"can't create new page buffer page")
+
+ /* set entry_ptr->loaded to TRUE so as to trigger the
+ * the delayed write test in H5PB__mark_entry_dirty().
+ */
+ entry_ptr->loaded = TRUE;
}
/* at this point, one way or the other, the multi-page metadata
@@ -3191,25 +3732,19 @@ H5PB__write_meta(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
HDmemcpy((uint8_t *)(entry_ptr->image_ptr), buf, size);
/* mark the entry dirty */
- if ( H5PB__mark_entry_dirty(pb_ptr, entry_ptr) < 0 )
+ if ( H5PB__mark_entry_dirty(f, pb_ptr, entry_ptr) < 0 )
HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
"mark entry dirty failed (1)")
-
/* insert in tick list if not there already */
if ( ! ( entry_ptr->modified_this_tick ) ) {
+ entry_ptr->modified_this_tick = TRUE;
+
H5PB__INSERT_IN_TL(pb_ptr, entry_ptr, FAIL)
}
- /* Test to see if we must delay the write of the multi-page
- * metadata entry, and move it from the LRU to the delayed write
- * list if so.
- */
-
- /* Write function for this -- assert false for now */
- HDassert(FALSE);
} else {
/* case 8) metadata write of size no larger than page size */
@@ -3267,7 +3802,7 @@ H5PB__write_meta(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
HDmemcpy(((uint8_t *)(entry_ptr->image_ptr) + offset),
(const uint8_t *)buf, size);
- if ( H5PB__mark_entry_dirty(pb_ptr, entry_ptr) < 0 )
+ if ( H5PB__mark_entry_dirty(f, pb_ptr, entry_ptr) < 0 )
HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
"mark entry dirty failed (2)")
@@ -3280,16 +3815,10 @@ H5PB__write_meta(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
*/
if ( ! ( entry_ptr->modified_this_tick ) ) {
+ entry_ptr->modified_this_tick = TRUE;
+
H5PB__INSERT_IN_TL(pb_ptr, entry_ptr, FAIL)
}
-
- /* Test to see if we must delay the write of the multi-page
- * metadata entry, and move it from the LRU to the delayed write
- * list if so.
- */
-
- /* Write function for this -- assert false for now */
- HDassert(FALSE);
}
}
@@ -3491,7 +4020,7 @@ H5PB__write_raw(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
HDmemcpy((uint8_t *)entry_ptr->image_ptr + offset, buf,
pb_ptr->page_size - (size_t)offset);
- if ( H5PB__mark_entry_dirty(pb_ptr, entry_ptr) < 0 )
+ if ( H5PB__mark_entry_dirty(f, pb_ptr, entry_ptr) < 0 )
HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
"mark entry dirty failed (1)")
@@ -3512,7 +4041,7 @@ H5PB__write_raw(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
(const uint8_t *)buf + offset,
(size_t)((addr + size) - last_page_addr));
- if ( H5PB__mark_entry_dirty(pb_ptr, entry_ptr) < 0 )
+ if ( H5PB__mark_entry_dirty(f, pb_ptr, entry_ptr) < 0 )
HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
"mark entry dirty failed (2)")
@@ -3574,7 +4103,7 @@ H5PB__write_raw(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
HDmemcpy(((uint8_t *)(entry_ptr->image_ptr)) + offset,
(const uint8_t *)buf, length);
- if ( H5PB__mark_entry_dirty(pb_ptr, entry_ptr) < 0 )
+ if ( H5PB__mark_entry_dirty(f, pb_ptr, entry_ptr) < 0 )
HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
"mark entry dirty failed (3)")
@@ -3610,7 +4139,7 @@ H5PB__write_raw(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
HDmemcpy((uint8_t *)(entry_ptr->image_ptr),
((const uint8_t *)(buf) + offset), length);
- if ( H5PB__mark_entry_dirty(pb_ptr, entry_ptr) < 0 )
+ if ( H5PB__mark_entry_dirty(f, pb_ptr, entry_ptr) < 0 )
HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
"mark entry dirty failed (3)")
diff --git a/src/H5PBpkg.h b/src/H5PBpkg.h
index e71396a..c6d13db 100644
--- a/src/H5PBpkg.h
+++ b/src/H5PBpkg.h
@@ -69,25 +69,26 @@
#if H5PB__DO_SANITY_CHECKS
-#define H5PB__DLL_PRE_REMOVE_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv) \
-if ( ( (head_ptr) == NULL ) || \
- ( (tail_ptr) == NULL ) || \
- ( (entry_ptr) == NULL ) || \
- ( (len) <= 0 ) || \
- ( (size_t)(Size) < (entry_ptr)->size ) || \
- ( ( (entry_ptr)->prev == NULL ) && ( (head_ptr) != (entry_ptr) ) ) || \
- ( ( (entry_ptr)->next == NULL ) && ( (tail_ptr) != (entry_ptr) ) ) || \
- ( ( (len) == 1 ) && \
- ( ! ( ( (head_ptr) == (entry_ptr) ) && \
- ( (tail_ptr) == (entry_ptr) ) && \
- ( (entry_ptr)->next == NULL ) && \
- ( (entry_ptr)->prev == NULL ) && \
- ( (Size) == (int64_t)((entry_ptr)->size) ) \
- ) \
- ) \
- ) \
- ) { \
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "DLL pre remove SC failed") \
+#define H5PB__DLL_PRE_REMOVE_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv) \
+if ( ( (head_ptr) == NULL ) || \
+ ( (tail_ptr) == NULL ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (len) <= 0 ) || \
+ ( (Size) < (int64_t)((entry_ptr)->size ) ) || \
+ ( ( (Size) == (int64_t)((entry_ptr)->size) ) && ( ! ( (len) == 1 ) ) ) || \
+ ( ( (entry_ptr)->prev == NULL ) && ( (head_ptr) != (entry_ptr) ) ) || \
+ ( ( (entry_ptr)->next == NULL ) && ( (tail_ptr) != (entry_ptr) ) ) || \
+ ( ( (len) == 1 ) && \
+ ( ! ( ( (head_ptr) == (entry_ptr) ) && \
+ ( (tail_ptr) == (entry_ptr) ) && \
+ ( (entry_ptr)->next == NULL ) && \
+ ( (entry_ptr)->prev == NULL ) && \
+ ( (Size) == (int64_t)((entry_ptr)->size) ) \
+ ) \
+ ) \
+ ) \
+ ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, (fv), "DLL pre remove SC failed") \
}
#define H5PB__DLL_SC(head_ptr, tail_ptr, len, Size, fv) \
@@ -107,7 +108,7 @@ if ( ( ( ( (head_ptr) == NULL ) || ( (tail_ptr) == NULL ) ) && \
) \
) \
) { \
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "DLL sanity check failed") \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, (fv), "DLL sanity check failed") \
}
#define H5PB__DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv) \
@@ -128,7 +129,7 @@ if ( ( (entry_ptr) == NULL ) || \
) \
) \
) { \
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "DLL pre insert SC failed") \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, (fv), "DLL pre insert SC failed") \
}
#else /* H5PB__DO_SANITY_CHECKS */
@@ -238,25 +239,146 @@ if ( ( (entry_ptr) == NULL ) || \
#if H5PB__DO_SANITY_CHECKS
+#define H5PB__IL_DLL_PRE_REMOVE_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv) \
+if ( ( (hd_ptr) == NULL ) || \
+ ( (tail_ptr) == NULL ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (len) <= 0 ) || \
+ ( (Size) < (int64_t)((entry_ptr)->size) ) || \
+ ( ( (Size) == (int64_t)((entry_ptr)->size) ) && \
+ ( ! ( (len) == 1 ) ) ) || \
+ ( ( (entry_ptr)->il_prev == NULL ) && ( (hd_ptr) != (entry_ptr) ) ) || \
+ ( ( (entry_ptr)->il_next == NULL ) && ( (tail_ptr) != (entry_ptr) ) ) || \
+ ( ( (len) == 1 ) && \
+ ( ! ( ( (hd_ptr) == (entry_ptr) ) && ( (tail_ptr) == (entry_ptr) ) && \
+ ( (entry_ptr)->il_next == NULL ) && \
+ ( (entry_ptr)->il_prev == NULL ) && \
+ ( (Size) == (int64_t)((entry_ptr)->size) ) \
+ ) \
+ ) \
+ ) \
+ ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, (fv), "il DLL pre remove SC failed") \
+}
+
+#define H5PB__IL_DLL_PRE_INSERT_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv) \
+if ( ( (entry_ptr) == NULL ) || \
+ ( (entry_ptr)->il_next != NULL ) || \
+ ( (entry_ptr)->il_prev != NULL ) || \
+ ( ( ( (hd_ptr) == NULL ) || ( (tail_ptr) == NULL ) ) && \
+ ( (hd_ptr) != (tail_ptr) ) \
+ ) || \
+ ( ( (len) == 1 ) && \
+ ( ( (hd_ptr) != (tail_ptr) ) || ( (Size) <= 0 ) || \
+ ( (hd_ptr) == NULL ) || ( (int64_t)((hd_ptr)->size) != (Size) ) \
+ ) \
+ ) || \
+ ( ( (len) >= 1 ) && \
+ ( ( (hd_ptr) == NULL ) || ( (hd_ptr)->il_prev != NULL ) || \
+ ( (tail_ptr) == NULL ) || ( (tail_ptr)->il_next != NULL ) \
+ ) \
+ ) \
+ ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, (fv), "IL DLL pre insert SC failed") \
+}
+
+#define H5PB__IL_DLL_SC(head_ptr, tail_ptr, len, Size, fv) \
+if ( ( ( ( (head_ptr) == NULL ) || ( (tail_ptr) == NULL ) ) && \
+ ( (head_ptr) != (tail_ptr) ) \
+ ) || \
+ ( ( (len) == 1 ) && \
+ ( ( (head_ptr) != (tail_ptr) ) || \
+ ( (head_ptr) == NULL ) || ( (int64_t)((head_ptr)->size) != (Size) ) \
+ ) \
+ ) || \
+ ( ( (len) >= 1 ) && \
+ ( ( (head_ptr) == NULL ) || ( (head_ptr)->il_prev != NULL ) || \
+ ( (tail_ptr) == NULL ) || ( (tail_ptr)->il_next != NULL ) \
+ ) \
+ ) \
+ ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, (fv), "IL DLL sanity check failed") \
+}
+
+#else /* H5PB__DO_SANITY_CHECKS */
+
+#define H5PB__IL_DLL_PRE_REMOVE_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv)
+#define H5PB__IL_DLL_PRE_INSERT_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv)
+#define H5PB__IL_DLL_SC(head_ptr, tail_ptr, len, Size, fv)
+
+#endif /* H5PB__DO_SANITY_CHECKS */
+
+
+#define H5PB__IL_DLL_APPEND(entry_ptr, head_ptr, tail_ptr, len, Size, fail_val)\
+{ \
+ H5PB__IL_DLL_PRE_INSERT_SC(entry_ptr, head_ptr, tail_ptr, len, Size, \
+ fail_val) \
+ if ( (head_ptr) == NULL ) \
+ { \
+ (head_ptr) = (entry_ptr); \
+ (tail_ptr) = (entry_ptr); \
+ } \
+ else \
+ { \
+ (tail_ptr)->il_next = (entry_ptr); \
+ (entry_ptr)->il_prev = (tail_ptr); \
+ (tail_ptr) = (entry_ptr); \
+ } \
+ (len)++; \
+ (Size) += (int64_t)((entry_ptr)->size); \
+ H5PB__IL_DLL_SC(head_ptr, tail_ptr, len, Size, fail_val) \
+} /* H5PB__IL_DLL_APPEND() */
+
+#define H5PB__IL_DLL_REMOVE(entry_ptr, head_ptr, tail_ptr, len, Size, fv) \
+{ \
+ H5PB__IL_DLL_PRE_REMOVE_SC(entry_ptr, head_ptr, tail_ptr, len, Size, fv) \
+ { \
+ if ( (head_ptr) == (entry_ptr) ) \
+ { \
+ (head_ptr) = (entry_ptr)->il_next; \
+ if ( (head_ptr) != NULL ) \
+ (head_ptr)->il_prev = NULL; \
+ } \
+ else \
+ (entry_ptr)->il_prev->il_next = (entry_ptr)->il_next; \
+ if ( (tail_ptr) == (entry_ptr) ) \
+ { \
+ (tail_ptr) = (entry_ptr)->il_prev; \
+ if ( (tail_ptr) != NULL ) \
+ (tail_ptr)->il_next = NULL; \
+ } \
+ else \
+ (entry_ptr)->il_next->il_prev = (entry_ptr)->il_prev; \
+ entry_ptr->il_next = NULL; \
+ entry_ptr->il_prev = NULL; \
+ (len)--; \
+ (Size) -= (int64_t)((entry_ptr)->size); \
+ } \
+ H5PB__IL_DLL_SC(head_ptr, tail_ptr, len, Size, fv) \
+} /* H5PB__IL_DLL_REMOVE() */
+
+
+#if H5PB__DO_SANITY_CHECKS
+
#define H5PB__TL_DLL_PRE_REMOVE_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv) \
if ( ( (hd_ptr) == NULL ) || \
( (tail_ptr) == NULL ) || \
( (entry_ptr) == NULL ) || \
( (len) <= 0 ) || \
- ( (Size) < (entry_ptr)->size ) || \
- ( ( (Size) == (entry_ptr)->size ) && ( ! ( (len) == 1 ) ) ) || \
+ ( (Size) < (int64_t)((entry_ptr)->size ) ) || \
+ ( ( (Size) == (int64_t)((entry_ptr)->size) ) && ( ! ( (len) == 1 ) ) ) || \
( ( (entry_ptr)->tl_prev == NULL ) && ( (hd_ptr) != (entry_ptr) ) ) || \
( ( (entry_ptr)->tl_next == NULL ) && ( (tail_ptr) != (entry_ptr) ) ) || \
( ( (len) == 1 ) && \
( ! ( ( (hd_ptr) == (entry_ptr) ) && ( (tail_ptr) == (entry_ptr) ) && \
( (entry_ptr)->tl_next == NULL ) && \
- ( (entry_ptr)->tlx_prev == NULL ) && \
- ( (Size) == (entry_ptr)->size ) \
+ ( (entry_ptr)->tl_prev == NULL ) && \
+ ( (Size) == (int64_t)((entry_ptr)->size) ) \
) \
) \
) \
) { \
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "TL DLL pre remove SC failed") \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, (fv), "TL DLL pre remove SC failed") \
}
#define H5PB__TL_DLL_SC(head_ptr, tail_ptr, len, Size, fv) \
@@ -276,7 +398,7 @@ if ( ( ( ( (head_ptr) == NULL ) || ( (tail_ptr) == NULL ) ) && \
) \
) \
) { \
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "TL DLL sanity check failed") \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, (fv), "TL DLL sanity check failed") \
}
#define H5PB__TL_DLL_PRE_INSERT_SC(entry_ptr, hd_ptr, tail_ptr, len, Size, fv) \
@@ -297,7 +419,7 @@ if ( ( (entry_ptr) == NULL ) || \
) \
) \
) { \
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, (fv), "TL DLL pre insert SC failed") \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, (fv), "TL DLL pre insert SC failed") \
}
#else /* H5PB__DO_SANITY_CHECKS */
@@ -369,7 +491,7 @@ if ( ( (entry_ptr) == NULL ) || \
entry_ptr->tl_next = NULL; \
entry_ptr->tl_prev = NULL; \
(len)--; \
- (Size) -= entry_ptr->size; \
+ (Size) -= (int64_t)(entry_ptr->size); \
} \
} /* H5PB__TL_DLL_REMOVE() */
@@ -411,16 +533,24 @@ if ( ( (entry_ptr) == NULL ) || \
((pb_ptr)->misses[ii])++; \
} /* H5PB__UPDATE_PB_HIT_RATE_STATS */
-#define H5PB__UPDATE_HT_SIZE_STATS(pb_ptr) \
- if ( (pb_ptr)->index_len > (pb_ptr)->max_index_len ) \
- (pb_ptr)->max_index_len = (pb_ptr)->index_len; \
- if ( (pb_ptr)->index_size > (pb_ptr)->max_index_size ) \
- (pb_ptr)->max_index_size = (pb_ptr)->index_size; \
- if ( (pb_ptr)->curr_md_pages > (pb_ptr)->max_md_pages ) \
- (pb_ptr)->max_md_pages = (pb_ptr)->curr_md_pages; \
- if ( (pb_ptr)->curr_rd_pages > (pb_ptr)->max_rd_pages ) \
- (pb_ptr)->max_rd_pages = (pb_ptr)->curr_rd_pages; \
- if ( (pb_ptr)->mpmde_count > (pb_ptr)->max_mpmde_count ) \
+#define H5PB__UPDATE_HT_SIZE_STATS(pb_ptr) \
+ if ( (pb_ptr)->index_len > (pb_ptr)->max_index_len ) \
+ (pb_ptr)->max_index_len = (pb_ptr)->index_len; \
+ if ( (pb_ptr)->clean_index_len > (pb_ptr)->max_clean_index_len ) \
+ (pb_ptr)->max_clean_index_len = (pb_ptr)->clean_index_len; \
+ if ( (pb_ptr)->dirty_index_len > (pb_ptr)->max_dirty_index_len ) \
+ (pb_ptr)->max_dirty_index_len = (pb_ptr)->dirty_index_len; \
+ if ( (pb_ptr)->index_size > (pb_ptr)->max_index_size ) \
+ (pb_ptr)->max_index_size = (pb_ptr)->index_size; \
+ if ( (pb_ptr)->clean_index_size > (pb_ptr)->max_clean_index_size ) \
+ (pb_ptr)->max_clean_index_size = (pb_ptr)->clean_index_size; \
+ if ( (pb_ptr)->dirty_index_size > (pb_ptr)->max_dirty_index_size ) \
+ (pb_ptr)->max_dirty_index_size = (pb_ptr)->dirty_index_size; \
+ if ( (pb_ptr)->curr_md_pages > (pb_ptr)->max_md_pages ) \
+ (pb_ptr)->max_md_pages = (pb_ptr)->curr_md_pages; \
+ if ( (pb_ptr)->curr_rd_pages > (pb_ptr)->max_rd_pages ) \
+ (pb_ptr)->max_rd_pages = (pb_ptr)->curr_rd_pages; \
+ if ( (pb_ptr)->mpmde_count > (pb_ptr)->max_mpmde_count ) \
(pb_ptr)->max_rd_pages = (pb_ptr)->curr_rd_pages;
#define H5PB__UPDATE_STATS_FOR_HT_INSERTION(pb_ptr) \
@@ -485,9 +615,9 @@ if ( ( (entry_ptr) == NULL ) || \
#define H5PB__UPDATE_DWL_DELAYED_WRITES(pb_ptr, insertion_depth, delay) \
{ \
HDassert((pb_ptr)->vfd_swmr_writer); \
- (pb_ptr)delayed_writes++; \
- (pb_ptr)total_delay += delay; \
- (pb_ptr)total_dwl_ins_depth += (insertion_depth) \
+ (pb_ptr)->delayed_writes++; \
+ (pb_ptr)->total_delay += (int64_t)(delay); \
+ (pb_ptr)->total_dwl_ins_depth += (insertion_depth); \
}
@@ -685,221 +815,426 @@ if ( ( (entry_ptr) == NULL ) || \
#if H5PB__DO_SANITY_CHECKS
-#define H5PB__PRE_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val) \
-if ( ( (pb_ptr) == NULL ) || \
- ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
- ( (entry_ptr) == NULL ) || \
- ( (entry_ptr)->magic != H5PB__H5PB_ENTRY_T_MAGIC ) || \
- ( (entry_ptr)->ht_next != NULL ) || \
- ( (entry_ptr)->ht_prev != NULL ) || \
- ( (entry_ptr)->size < pb_ptr->page_size ) || \
- ( H5PB__HASH_FCN((entry_ptr)->page) < 0 ) || \
- ( H5PB__HASH_FCN((entry_ptr)->page) >= H5PB__HASH_TABLE_LEN ) || \
- ( (pb_ptr)->index_len < 0 ) || \
- ( (pb_ptr)->index_size < 0 ) || \
- ( (pb_ptr)->curr_pages < 0 ) || \
- ( (pb_ptr)->curr_rd_pages < 0 ) || \
- ( (pb_ptr)->curr_md_pages < 0 ) || \
- ( ((pb_ptr)->curr_pages != \
- ((pb_ptr)->curr_md_pages + (pb_ptr)->curr_rd_pages)) ) || \
- ( (pb_ptr)->mpmde_count < 0 ) || \
- ( (pb_ptr)->index_len != \
- ((pb_ptr)->curr_pages + (pb_ptr)->mpmde_count) ) ) { \
- HDassert(FALSE); \
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, "pre HT insert SC failed") \
+#define H5PB__PRE_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (entry_ptr)->ht_next != NULL ) || \
+ ( (entry_ptr)->ht_prev != NULL ) || \
+ ( (entry_ptr)->size <= 0 ) || \
+ ( H5PB__HASH_FCN((entry_ptr)->page) < 0 ) || \
+ ( H5PB__HASH_FCN((entry_ptr)->page) >= H5PB__HASH_TABLE_LEN ) || \
+ ( (pb_ptr)->index_size != \
+ ((pb_ptr)->clean_index_size + \
+ (pb_ptr)->dirty_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->clean_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->dirty_index_size) ) || \
+ ( (pb_ptr)->index_len != (pb_ptr)->il_len ) || \
+ ( (pb_ptr)->index_size != (pb_ptr)->il_size ) || \
+ ( (pb_ptr)->curr_pages < 0 ) || \
+ ( (pb_ptr)->curr_rd_pages < 0 ) || \
+ ( (pb_ptr)->curr_md_pages < 0 ) || \
+ ( ((pb_ptr)->curr_pages != \
+ ((pb_ptr)->curr_md_pages + (pb_ptr)->curr_rd_pages)) ) || \
+ ( (pb_ptr)->mpmde_count < 0 ) || \
+ ( (pb_ptr)->index_len != \
+ ((pb_ptr)->curr_pages + (pb_ptr)->mpmde_count) ) ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, fail_val, "pre HT insert SC failed") \
}
-#define H5PB__POST_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val) \
-if ( ( (pb_ptr) == NULL ) || \
- ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
- ( (entry_ptr)->magic != H5PB__H5PB_ENTRY_T_MAGIC ) || \
- ( (pb_ptr)->index_len < 1 ) || \
- ( (pb_ptr)->index_len != \
- ((pb_ptr)->curr_pages + (pb_ptr)->mpmde_count) ) || \
- ( (pb_ptr)->index_size < (int64_t)((entry_ptr)->size) ) ) { \
- HDassert(FALSE); \
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, "post HT insert SC failed") \
+#define H5PB__POST_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
+ ( (pb_ptr)->index_size != \
+ ((pb_ptr)->clean_index_size + \
+ (pb_ptr)->dirty_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->clean_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->dirty_index_size) ) || \
+ ( (pb_ptr)->index_len != (pb_ptr)->il_len ) || \
+ ( (pb_ptr)->index_len != \
+ ((pb_ptr)->curr_pages + (pb_ptr)->mpmde_count) ) || \
+ ( (pb_ptr)->index_size != (pb_ptr)->il_size) ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, fail_val, "post HT insert SC failed") \
}
-#define H5PB__PRE_HT_REMOVE_SC(pb_ptr, entry_ptr) \
-if ( ( (pb_ptr) == NULL ) || \
- ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
- ( (pb_ptr)->index_len < 1 ) || \
- ( (entry_ptr) == NULL ) || \
- ( (entry_ptr)->magic != H5PB__H5PB_ENTRY_T_MAGIC ) || \
- ( (entry_ptr)->size < pb_ptr->page_size ) || \
- ( (pb_ptr)->index_len < 1 ) || \
- ( (pb_ptr)->index_size < (int64_t)((entry_ptr)->size) ) || \
- ( ((pb_ptr)->ht)[(H5PB__HASH_FCN((entry_ptr)->page))] \
- == NULL ) || \
- ( ( ((pb_ptr)->ht)[(H5PB__HASH_FCN((entry_ptr)->page))] \
- != (entry_ptr) ) && \
- ( (entry_ptr)->ht_prev == NULL ) ) || \
- ( ( ((pb_ptr)->ht)[(H5PB__HASH_FCN((entry_ptr)->page))] == \
- (entry_ptr) ) && \
- ( (entry_ptr)->ht_prev != NULL ) ) ) { \
- HDassert(FALSE); \
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "pre HT remove SC failed") \
+#define H5PB__PRE_HT_REMOVE_SC(pb_ptr, entry_ptr) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
+ ( (pb_ptr)->index_len < 1 ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (pb_ptr)->index_size < (int64_t)((entry_ptr)->size) ) || \
+ ( (entry_ptr)->size <= 0 ) || \
+ ( H5PB__HASH_FCN((entry_ptr)->page) < 0 ) || \
+ ( H5PB__HASH_FCN((entry_ptr)->page) >= H5PB__HASH_TABLE_LEN ) || \
+ ( ((pb_ptr)->ht)[(H5PB__HASH_FCN((entry_ptr)->page))] \
+ == NULL ) || \
+ ( ( ((pb_ptr)->ht)[(H5PB__HASH_FCN((entry_ptr)->page))] \
+ != (entry_ptr) ) && \
+ ( (entry_ptr)->ht_prev == NULL ) ) || \
+ ( ( ((pb_ptr)->ht)[(H5PB__HASH_FCN((entry_ptr)->page))] == \
+ (entry_ptr) ) && \
+ ( (entry_ptr)->ht_prev != NULL ) ) || \
+ ( (pb_ptr)->index_size != \
+ ((pb_ptr)->clean_index_size + \
+ (pb_ptr)->dirty_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->clean_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->dirty_index_size) ) || \
+ ( (pb_ptr)->index_len != (pb_ptr)->il_len ) || \
+ ( (pb_ptr)->index_size != (pb_ptr)->il_size ) ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, "pre HT remove SC failed") \
}
-#define H5PB__POST_HT_REMOVE_SC(pb_ptr, entry_ptr) \
-if ( ( (pb_ptr) == NULL ) || \
- ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
- ( (entry_ptr) == NULL ) || \
- ( (entry_ptr)->magic != H5PB__H5PB_ENTRY_T_MAGIC ) || \
- ( (entry_ptr)->size < (pb_ptr)->page_size ) || \
- ( (entry_ptr)->ht_prev != NULL ) || \
- ( (entry_ptr)->ht_prev != NULL ) || \
- ( (pb_ptr)->index_len < 0 ) || \
- ( (pb_ptr)->index_size < 0 ) || \
- ( (pb_ptr)->curr_pages < 0 ) || \
- ( (pb_ptr)->curr_rd_pages < 0 ) || \
- ( (pb_ptr)->curr_md_pages < 0 ) || \
- ( ((pb_ptr)->curr_pages != \
- ((pb_ptr)->curr_md_pages + (pb_ptr)->curr_rd_pages)) ) || \
- ( (pb_ptr)->mpmde_count < 0 ) || \
- ( (pb_ptr)->index_len != \
- ((pb_ptr)->curr_pages + (pb_ptr)->mpmde_count) ) ) { \
- HDassert(FALSE); \
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, FAIL, "post HT remove SC failed") \
+#define H5PB__POST_HT_REMOVE_SC(pb_ptr, entry_ptr) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (entry_ptr)->size <= 0 ) || \
+ ( (entry_ptr)->ht_prev != NULL ) || \
+ ( (entry_ptr)->ht_prev != NULL ) || \
+ ( (pb_ptr)->index_size != \
+ ((pb_ptr)->clean_index_size + \
+ (pb_ptr)->dirty_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->clean_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->dirty_index_size) ) || \
+ ( (pb_ptr)->index_len != (pb_ptr)->il_len ) || \
+ ( (pb_ptr)->index_size != (pb_ptr)->il_size ) || \
+ ( (pb_ptr)->curr_pages < 0 ) || \
+ ( (pb_ptr)->curr_rd_pages < 0 ) || \
+ ( (pb_ptr)->curr_md_pages < 0 ) || \
+ ( ((pb_ptr)->curr_pages != \
+ ((pb_ptr)->curr_md_pages + (pb_ptr)->curr_rd_pages)) ) || \
+ ( (pb_ptr)->mpmde_count < 0 ) || \
+ ( (pb_ptr)->index_len != \
+ ((pb_ptr)->curr_pages + (pb_ptr)->mpmde_count) ) ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, "post HT remove SC failed") \
}
-#define H5PB__PRE_HT_SEARCH_SC(pb_ptr, page, fail_val) \
-if ( ( (pb_ptr) == NULL ) || \
- ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
- ( H5PB__HASH_FCN(page) < 0 ) || \
- ( H5PB__HASH_FCN(page) >= H5PB__HASH_TABLE_LEN ) ) { \
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, "pre HT search SC failed") \
+#define H5PB__PRE_HT_SEARCH_SC(pb_ptr, page, fail_val) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
+ ( (pb_ptr)->index_size != \
+ ((pb_ptr)->clean_index_size + (pb_ptr)->dirty_index_size) ) || \
+ ( H5PB__HASH_FCN(page) < 0 ) || \
+ ( H5PB__HASH_FCN(page) >= H5PB__HASH_TABLE_LEN ) ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, fail_val, "pre HT search SC failed") \
}
-#define H5PB__POST_SUC_HT_SEARCH_SC(pb_ptr, entry_ptr, k, fail_val) \
-if ( ( (pb_ptr) == NULL ) || \
- ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
- ( (pb_ptr)->index_len < 1 ) || \
- ( (entry_ptr) == NULL ) || \
- ( (entry_ptr)->magic != H5PB__H5PB_ENTRY_T_MAGIC ) || \
- ( (pb_ptr)->index_size < (int64_t)((entry_ptr)->size) ) || \
- ( (pb_ptr)->index_len < 1 ) || \
- ( (entry_ptr)->size < (pb_ptr)->page_size ) || \
- ( ( k < 0 ) || ( k >= H5PB__HASH_TABLE_LEN ) ) || \
- ( ((pb_ptr)->ht)[k] == NULL ) || \
- ( ( ((pb_ptr)->ht)[k] != (entry_ptr) ) && \
- ( (entry_ptr)->ht_prev == NULL ) ) || \
- ( ( ((pb_ptr)->ht)[k] == (entry_ptr) ) && \
- ( (entry_ptr)->ht_prev != NULL ) ) || \
- ( ( (entry_ptr)->ht_prev != NULL ) && \
- ( (entry_ptr)->ht_prev->ht_next != (entry_ptr) ) ) || \
- ( ( (entry_ptr)->ht_next != NULL ) && \
- ( (entry_ptr)->ht_next->ht_prev != (entry_ptr) ) ) ) { \
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, \
- "post successful HT search SC failed") \
+#define H5PB__POST_SUC_HT_SEARCH_SC(pb_ptr, entry_ptr, k, fail_val) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
+ ( (pb_ptr)->index_len < 1 ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (pb_ptr)->index_size < (int64_t)((entry_ptr)->size )) || \
+ ( (pb_ptr)->index_size != \
+ ((pb_ptr)->clean_index_size + (pb_ptr)->dirty_index_size) ) || \
+ ( (entry_ptr)->size <= 0 ) || \
+ ( ((pb_ptr)->ht)[k] == NULL ) || \
+ ( ( ((pb_ptr)->ht)[k] != (entry_ptr) ) && \
+ ( (entry_ptr)->ht_prev == NULL ) ) || \
+ ( ( ((pb_ptr)->ht)[k] == (entry_ptr) ) && \
+ ( (entry_ptr)->ht_prev != NULL ) ) || \
+ ( ( (entry_ptr)->ht_prev != NULL ) && \
+ ( (entry_ptr)->ht_prev->ht_next != (entry_ptr) ) ) || \
+ ( ( (entry_ptr)->ht_next != NULL ) && \
+ ( (entry_ptr)->ht_next->ht_prev != (entry_ptr) ) ) ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, fail_val, \
+ "post successful HT search SC failed") \
}
#define H5PB__POST_HT_SHIFT_TO_FRONT_SC(pb_ptr, entry_ptr, k, fail_val) \
if ( ( (pb_ptr) == NULL ) || \
( ((pb_ptr)->ht)[k] != (entry_ptr) ) || \
( (entry_ptr)->ht_prev != NULL ) ) { \
- HGOTO_ERROR(H5E_CACHE, H5E_SYSTEM, fail_val, \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, fail_val, \
"post HT shift to front SC failed") \
}
+#define H5PB__PRE_HT_ENTRY_SIZE_CHANGE_SC(pb_ptr, old_size, new_size, \
+ entry_ptr, was_clean) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->index_len <= 0 ) || \
+ ( (pb_ptr)->index_size <= 0 ) || \
+ ( (new_size) <= 0 ) || \
+ ( (old_size) > (pb_ptr)->index_size ) || \
+ ( ( (pb_ptr)->index_len == 1 ) && \
+ ( (pb_ptr)->index_size != (old_size) ) ) || \
+ ( (pb_ptr)->index_size != \
+ ((pb_ptr)->clean_index_size + \
+ (pb_ptr)->dirty_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->clean_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->dirty_index_size) ) || \
+ ( ( !( was_clean ) || \
+ ( (pb_ptr)->clean_index_size < (old_size) ) ) && \
+ ( ( (was_clean) ) || \
+ ( (pb_ptr)->dirty_index_size < (old_size) ) ) ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (pb_ptr)->index_len != (pb_ptr)->il_len ) || \
+ ( (pb_ptr)->index_size != (pb_ptr)->il_size ) ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "pre HT entry size change SC failed") \
+}
+
+#define H5PB__POST_HT_ENTRY_SIZE_CHANGE_SC(pb_ptr, old_size, new_size, \
+ entry_ptr) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->index_len <= 0 ) || \
+ ( (pb_ptr)->index_size <= 0 ) || \
+ ( (new_size) > (pb_ptr)->index_size ) || \
+ ( (pb_ptr)->index_size != \
+ ((pb_ptr)->clean_index_size + \
+ (pb_ptr)->dirty_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->clean_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->dirty_index_size) ) || \
+ ( ( !((entry_ptr)->is_dirty ) || \
+ ( (pb_ptr)->dirty_index_size < (new_size) ) ) && \
+ ( ( ((entry_ptr)->is_dirty) ) || \
+ ( (pb_ptr)->clean_index_size < (new_size) ) ) ) || \
+ ( ( (pb_ptr)->index_len == 1 ) && \
+ ( (pb_ptr)->index_size != (new_size) ) ) || \
+ ( (pb_ptr)->index_len != (pb_ptr)->il_len ) || \
+ ( (pb_ptr)->index_size != (pb_ptr)->il_size ) ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "post HT entry size change SC failed") \
+}
+
+#define H5PB__PRE_HT_UPDATE_FOR_ENTRY_CLEAN_SC(pb_ptr, entry_ptr) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
+ ( (pb_ptr)->index_len <= 0 ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (entry_ptr)->is_dirty != FALSE ) || \
+ ( (pb_ptr)->index_size < (int64_t)((entry_ptr)->size) ) || \
+ ( (pb_ptr)->dirty_index_size < (int64_t)((entry_ptr)->size) ) || \
+ ( (pb_ptr)->index_size != \
+ ((pb_ptr)->clean_index_size + (pb_ptr)->dirty_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->clean_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->dirty_index_size) ) ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "pre HT update for entry clean SC failed") \
+}
+
+#define H5PB__PRE_HT_UPDATE_FOR_ENTRY_DIRTY_SC(pb_ptr, entry_ptr) \
+if ( ( (pb_ptr) == NULL ) || \
+ ( (pb_ptr)->magic != H5PB__H5PB_T_MAGIC ) || \
+ ( (pb_ptr)->index_len <= 0 ) || \
+ ( (entry_ptr) == NULL ) || \
+ ( (entry_ptr)->is_dirty != TRUE ) || \
+ ( (pb_ptr)->index_size < (int64_t)((entry_ptr)->size) ) || \
+ ( (pb_ptr)->clean_index_size < (int64_t)((entry_ptr)->size) ) || \
+ ( (pb_ptr)->index_size != \
+ ((pb_ptr)->clean_index_size + (pb_ptr)->dirty_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->clean_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->dirty_index_size) ) ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "pre HT update for entry dirty SC failed") \
+}
+
+#define H5PB__POST_HT_UPDATE_FOR_ENTRY_CLEAN_SC(pb_ptr, entry_ptr) \
+if ( ( (pb_ptr)->index_size != \
+ ((pb_ptr)->clean_index_size + (pb_ptr)->dirty_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->clean_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->dirty_index_size) ) ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "post HT update for entry clean SC failed") \
+}
+
+#define H5PB__POST_HT_UPDATE_FOR_ENTRY_DIRTY_SC(pb_ptr, entry_ptr) \
+if ( ( (pb_ptr)->index_size != \
+ ((pb_ptr)->clean_index_size + (pb_ptr)->dirty_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->clean_index_size) ) || \
+ ( (pb_ptr)->index_size < ((pb_ptr)->dirty_index_size) ) ) { \
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, \
+ "post HT update for entry dirty SC failed") \
+}
+
#else /* H5PB__DO_SANITY_CHECKS */
#define H5PB__PRE_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val)
#define H5PB__POST_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val)
#define H5PB__PRE_HT_REMOVE_SC(pb_ptr, entry_ptr)
#define H5PB__POST_HT_REMOVE_SC(pb_ptr, entry_ptr)
-#define H5PB__PRE_HT_SEARCH_SC(pb_ptr, page, fail_val)
+#define H5PB__PRE_HT_SEARCH_SC(pb_ptr, Addr, fail_val)
#define H5PB__POST_SUC_HT_SEARCH_SC(pb_ptr, entry_ptr, k, fail_val)
#define H5PB__POST_HT_SHIFT_TO_FRONT_SC(pb_ptr, entry_ptr, k, fail_val)
+#define H5PB__PRE_HT_UPDATE_FOR_ENTRY_CLEAN_SC(pb_ptr, entry_ptr)
+#define H5PB__PRE_HT_UPDATE_FOR_ENTRY_DIRTY_SC(pb_ptr, entry_ptr)
+#define H5PB__PRE_HT_ENTRY_SIZE_CHANGE_SC(pb_ptr, old_size, new_size, \
+ entry_ptr, was_clean)
+#define H5PB__POST_HT_ENTRY_SIZE_CHANGE_SC(pb_ptr, old_size, new_size, \
+ entry_ptr)
+#define H5PB__POST_HT_UPDATE_FOR_ENTRY_CLEAN_SC(pb_ptr, entry_ptr)
+#define H5PB__POST_HT_UPDATE_FOR_ENTRY_DIRTY_SC(pb_ptr, entry_ptr)
#endif /* H5PB__DO_SANITY_CHECKS */
-#define H5PB__INSERT_IN_INDEX(pb_ptr, entry_ptr, fail_val) \
+
+#define H5PB__INSERT_IN_INDEX(pb_ptr, entry_ptr, fail_val) \
+{ \
+ int k; \
+ H5PB__PRE_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val) \
+ k = H5PB__HASH_FCN((entry_ptr)->page); \
+ if(((pb_ptr)->ht)[k] != NULL) { \
+ (entry_ptr)->ht_next = ((pb_ptr)->ht)[k]; \
+ (entry_ptr)->ht_next->ht_prev = (entry_ptr); \
+ } \
+ ((pb_ptr)->ht)[k] = (entry_ptr); \
+ (pb_ptr)->index_len++; \
+ (pb_ptr)->index_size += (int64_t)((entry_ptr)->size); \
+ if((entry_ptr)->is_dirty) { \
+ (pb_ptr)->dirty_index_size += (int64_t)((entry_ptr)->size); \
+ } else { \
+ (pb_ptr)->clean_index_size += (int64_t)((entry_ptr)->size); \
+ } \
+ if ( (entry_ptr)->is_metadata ) { \
+ if ( (entry_ptr)->is_mpmde ) { \
+ ((pb_ptr)->mpmde_count)++; \
+ } else { \
+ ((pb_ptr)->curr_md_pages)++; \
+ (pb_ptr)->curr_pages++; \
+ } \
+ } else { \
+ ((pb_ptr)->curr_rd_pages)++; \
+ (pb_ptr)->curr_pages++; \
+ } \
+ H5PB__IL_DLL_APPEND((entry_ptr), (pb_ptr)->il_head, \
+ (pb_ptr)->il_tail, (pb_ptr)->il_len, \
+ (pb_ptr)->il_size, fail_val) \
+ H5PB__UPDATE_STATS_FOR_HT_INSERTION(pb_ptr) \
+ H5PB__UPDATE_HT_SIZE_STATS(pb_ptr) \
+ H5PB__POST_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val) \
+}
+
+#define H5PB__DELETE_FROM_INDEX(pb_ptr, entry_ptr, fail_val) \
+{ \
+ int k; \
+ H5PB__PRE_HT_REMOVE_SC(pb_ptr, entry_ptr) \
+ k = H5PB__HASH_FCN((entry_ptr)->page); \
+ if((entry_ptr)->ht_next) \
+ (entry_ptr)->ht_next->ht_prev = (entry_ptr)->ht_prev; \
+ if((entry_ptr)->ht_prev) \
+ (entry_ptr)->ht_prev->ht_next = (entry_ptr)->ht_next; \
+ if(((pb_ptr)->ht)[k] == (entry_ptr)) \
+ ((pb_ptr)->ht)[k] = (entry_ptr)->ht_next; \
+ (entry_ptr)->ht_next = NULL; \
+ (entry_ptr)->ht_prev = NULL; \
+ (pb_ptr)->index_len--; \
+ (pb_ptr)->index_size -= (int64_t)((entry_ptr)->size); \
+ if((entry_ptr)->is_dirty) { \
+ (pb_ptr)->dirty_index_size -= (int64_t)((entry_ptr)->size); \
+ } else { \
+ (pb_ptr)->clean_index_size -= (int64_t)((entry_ptr)->size); \
+ } \
+ if ( (entry_ptr)->is_metadata ) { \
+ if ( (entry_ptr)->is_mpmde ) { \
+ ((pb_ptr)->mpmde_count)--; \
+ } else { \
+ ((pb_ptr)->curr_md_pages)--; \
+ (pb_ptr)->curr_pages--; \
+ } \
+ } else { \
+ ((pb_ptr)->curr_rd_pages)--; \
+ (pb_ptr)->curr_pages--; \
+ } \
+ H5PB__IL_DLL_REMOVE((entry_ptr), (pb_ptr)->il_head, \
+ (pb_ptr)->il_tail, (pb_ptr)->il_len, \
+ (pb_ptr)->il_size, fail_val) \
+ H5PB__UPDATE_STATS_FOR_HT_DELETION(pb_ptr) \
+ H5PB__POST_HT_REMOVE_SC(pb_ptr, entry_ptr) \
+}
+
+#define H5PB__SEARCH_INDEX(pb_ptr, Page, entry_ptr, fail_val) \
+{ \
+ int k; \
+ int depth = 0; \
+ H5PB__PRE_HT_SEARCH_SC(pb_ptr, Page, fail_val) \
+ k = H5PB__HASH_FCN(Page); \
+ entry_ptr = ((pb_ptr)->ht)[k]; \
+ while(entry_ptr) { \
+ if ( (Page) == (entry_ptr)->page ) { \
+ H5PB__POST_SUC_HT_SEARCH_SC(pb_ptr, entry_ptr, k, fail_val) \
+ if ( (entry_ptr) != ((pb_ptr)->ht)[k] ) { \
+ if ( (entry_ptr)->ht_next ) \
+ (entry_ptr)->ht_next->ht_prev = (entry_ptr)->ht_prev; \
+ HDassert((entry_ptr)->ht_prev != NULL); \
+ (entry_ptr)->ht_prev->ht_next = (entry_ptr)->ht_next; \
+ ((pb_ptr)->ht)[k]->ht_prev = (entry_ptr); \
+ (entry_ptr)->ht_next = ((pb_ptr)->ht)[k]; \
+ (entry_ptr)->ht_prev = NULL; \
+ ((pb_ptr)->ht)[k] = (entry_ptr); \
+ H5PB__POST_HT_SHIFT_TO_FRONT_SC(pb_ptr, entry_ptr, k, fail_val)\
+ } \
+ break; \
+ } \
+ (entry_ptr) = (entry_ptr)->ht_next; \
+ (depth)++; \
+ } \
+ H5PB__UPDATE_STATS_FOR_HT_SEARCH(pb_ptr, (entry_ptr != NULL), depth) \
+}
+
+#define H5PB__SEARCH_INDEX_NO_STATS(pb_ptr, Page, entry_ptr, fail_val) \
+{ \
+ int k; \
+ H5PB__PRE_HT_SEARCH_SC(pb_ptr, Page, fail_val) \
+ k = H5PB__HASH_FCN(Page); \
+ entry_ptr = ((pb_ptr)->ht)[k]; \
+ while(entry_ptr) { \
+ if ( (Page), (entry_ptr)->page) ) { \
+ H5PB__POST_SUC_HT_SEARCH_SC(pb_ptr, entry_ptr, k, fail_val) \
+ if ( entry_ptr != ((pb_ptr)->ht)[k] ) { \
+ if( (entry_ptr)->ht_next ) \
+ (entry_ptr)->ht_next->ht_prev = (entry_ptr)->ht_prev; \
+ HDassert((entry_ptr)->ht_prev != NULL); \
+ (entry_ptr)->ht_prev->ht_next = (entry_ptr)->ht_next; \
+ ((pb_ptr)->ht)[k]->ht_prev = (entry_ptr); \
+ (entry_ptr)->ht_next = ((pb_ptr)->ht)[k]; \
+ (entry_ptr)->ht_prev = NULL; \
+ ((pb_ptr)->ht)[k] = (entry_ptr); \
+ H5PB__POST_HT_SHIFT_TO_FRONT_SC(pb_ptr, entry_ptr, k, fail_val)\
+ } \
+ break; \
+ } \
+ (entry_ptr) = (entry_ptr)->ht_next; \
+ } \
+}
+
+#define H5PB__UPDATE_INDEX_FOR_ENTRY_CLEAN(pb_ptr, entry_ptr) \
{ \
- int k; \
- H5PB__PRE_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val) \
- k = H5PB__HASH_FCN((entry_ptr)->page); \
- if(((pb_ptr)->ht)[k] != NULL) { \
- (entry_ptr)->ht_next = ((pb_ptr)->ht)[k]; \
- (entry_ptr)->ht_next->ht_prev = (entry_ptr); \
- } \
- ((pb_ptr)->ht)[k] = (entry_ptr); \
- (pb_ptr)->index_len++; \
- (pb_ptr)->index_size += (int64_t)((entry_ptr)->size); \
- if ( (entry_ptr)->is_metadata ) { \
- if ( (entry_ptr)->is_mpmde ) { \
- ((pb_ptr)->mpmde_count)++; \
- } else { \
- ((pb_ptr)->curr_md_pages)++; \
- (pb_ptr)->curr_pages++; \
- } \
- } else { \
- ((pb_ptr)->curr_rd_pages)++; \
- (pb_ptr)->curr_pages++; \
- } \
- H5PB__UPDATE_STATS_FOR_HT_INSERTION(pb_ptr) \
- H5PB__UPDATE_HT_SIZE_STATS(pb_ptr) \
- H5PB__POST_HT_INSERT_SC(pb_ptr, entry_ptr, fail_val) \
+ H5PB__PRE_HT_UPDATE_FOR_ENTRY_CLEAN_SC(pb_ptr, entry_ptr); \
+ (pb_ptr)->dirty_index_size -= (int64_t)((entry_ptr)->size); \
+ (pb_ptr)->clean_index_size += (int64_t)((entry_ptr)->size); \
+ H5PB__POST_HT_UPDATE_FOR_ENTRY_CLEAN_SC(pb_ptr, entry_ptr); \
}
-#define H5PB__DELETE_FROM_INDEX(pb_ptr, entry_ptr, fail_val) \
+#define H5PB__UPDATE_INDEX_FOR_ENTRY_DIRTY(pb_ptr, entry_ptr) \
{ \
- int k; \
- H5PB__PRE_HT_REMOVE_SC(pb_ptr, entry_ptr) \
- k = H5PB__HASH_FCN((entry_ptr)->page); \
- if((entry_ptr)->ht_next) \
- (entry_ptr)->ht_next->ht_prev = (entry_ptr)->ht_prev; \
- if((entry_ptr)->ht_prev) \
- (entry_ptr)->ht_prev->ht_next = (entry_ptr)->ht_next; \
- if(((pb_ptr)->ht)[k] == (entry_ptr)) \
- ((pb_ptr)->ht)[k] = (entry_ptr)->ht_next; \
- (entry_ptr)->ht_next = NULL; \
- (entry_ptr)->ht_prev = NULL; \
- (pb_ptr)->index_len--; \
- (pb_ptr)->index_size -= (int64_t)((entry_ptr)->size); \
- if ( (entry_ptr)->is_metadata ) { \
- if ( (entry_ptr)->is_mpmde ) { \
- ((pb_ptr)->mpmde_count)--; \
- } else { \
- ((pb_ptr)->curr_md_pages)--; \
- (pb_ptr)->curr_pages--; \
- } \
- } else { \
- ((pb_ptr)->curr_rd_pages)--; \
- (pb_ptr)->curr_pages--; \
- } \
- H5PB__UPDATE_STATS_FOR_HT_DELETION(pb_ptr) \
- H5PB__POST_HT_REMOVE_SC(pb_ptr, entry_ptr) \
+ H5PB__PRE_HT_UPDATE_FOR_ENTRY_DIRTY_SC(pb_ptr, entry_ptr); \
+ (pb_ptr)->clean_index_size -= (int64_t)((entry_ptr)->size); \
+ (pb_ptr)->dirty_index_size += (int64_t)((entry_ptr)->size); \
+ H5PB__POST_HT_UPDATE_FOR_ENTRY_DIRTY_SC(pb_ptr, entry_ptr); \
}
-#define H5PB__SEARCH_INDEX(pb_ptr, pg, entry_ptr, f_val) \
-{ \
- int k; \
- int depth = 0; \
- H5PB__PRE_HT_SEARCH_SC((pb_ptr), (pg), (f_val)) \
- k = H5PB__HASH_FCN((pg)); \
- entry_ptr = ((pb_ptr)->ht)[k]; \
- while ( entry_ptr ) { \
- if ( (pg) == (entry_ptr)->page ) { \
- H5PB__POST_SUC_HT_SEARCH_SC(pb_ptr, entry_ptr, k, f_val) \
- if ( entry_ptr != ((pb_ptr)->ht)[k] ) { \
- if ( (entry_ptr)->ht_next ) \
- (entry_ptr)->ht_next->ht_prev = (entry_ptr)->ht_prev; \
- HDassert((entry_ptr)->ht_prev != NULL); \
- (entry_ptr)->ht_prev->ht_next = (entry_ptr)->ht_next; \
- ((pb_ptr)->ht)[k]->ht_prev = (entry_ptr); \
- (entry_ptr)->ht_next = ((pb_ptr)->ht)[k]; \
- (entry_ptr)->ht_prev = NULL; \
- ((pb_ptr)->ht)[k] = (entry_ptr); \
- H5PB__POST_HT_SHIFT_TO_FRONT_SC(pb_ptr, entry_ptr, k, f_val) \
- } \
- break; \
- } \
- (entry_ptr) = (entry_ptr)->ht_next; \
- (depth)++; \
- } \
- H5PB__UPDATE_STATS_FOR_HT_SEARCH(pb_ptr, (entry_ptr != NULL), depth) \
+#define H5PB__UPDATE_INDEX_FOR_SIZE_CHANGE(pb_ptr, old_size, new_size, \
+ entry_ptr, was_clean) \
+{ \
+ H5PB__PRE_HT_ENTRY_SIZE_CHANGE_SC(pb_ptr, old_size, new_size, \
+ entry_ptr, was_clean) \
+ (pb_ptr)->index_size -= (old_size); \
+ (pb_ptr)->index_size += (new_size); \
+ if(was_clean) { \
+ (pb_ptr)->clean_index_size -= (old_size); \
+ } else { \
+ (pb_ptr)->dirty_index_size -= (old_size); \
+ } \
+ if((entry_ptr)->is_dirty) { \
+ (pb_ptr)->dirty_index_size += (new_size); \
+ } else { \
+ (pb_ptr)->clean_index_size += (new_size); \
+ } \
+ H5PB__DLL_UPDATE_FOR_SIZE_CHANGE((pb_ptr)->il_len, \
+ (pb_ptr)->il_size, \
+ (old_size), (new_size)) \
+ H5PB__POST_HT_ENTRY_SIZE_CHANGE_SC(pb_ptr, old_size, new_size, \
+ entry_ptr) \
}
@@ -1072,7 +1407,7 @@ if ( ( (pb_ptr) == NULL ) || \
HDassert( (pb_ptr)->magic == H5PB__H5PB_T_MAGIC ); \
HDassert( (entry_ptr) ); \
HDassert( (entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC ); \
- HDassert( (entry_ptr)->size >= pb_ptr->page_size ); \
+ HDassert( (entry_ptr)->size == pb_ptr->page_size ); \
\
/* modified LRU specific code */ \
\
@@ -1217,7 +1552,7 @@ if ( ( (pb_ptr) == NULL ) || \
{ \
HDassert( (pb_ptr) ); \
HDassert( (pb_ptr)->magic == H5PB__H5PB_T_MAGIC ); \
- HDassert( (pb_ptr)->vfd_swmr_writer ) \
+ HDassert( (pb_ptr)->vfd_swmr_writer ); \
HDassert( (entry_ptr) ); \
HDassert( (entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC ); \
HDassert( (entry_ptr)->modified_this_tick ); \
@@ -1295,7 +1630,7 @@ if ( ( (pb_ptr) == NULL ) || \
\
HDassert( (pb_ptr) ); \
HDassert( (pb_ptr)->magic == H5PB__H5PB_T_MAGIC ); \
- HDassert( (pb_ptr)->vfd_swmr_writer ) \
+ HDassert( (pb_ptr)->vfd_swmr_writer ); \
HDassert( (entry_ptr) ); \
HDassert( (entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC ); \
HDassert( (entry_ptr)->size >= pb_ptr->page_size ); \
@@ -1313,7 +1648,7 @@ if ( ( (pb_ptr) == NULL ) || \
\
H5PB__DLL_INSERT_BEFORE((entry_ptr), (suc_ptr), (pb_ptr)->dwl_head_ptr, \
(pb_ptr)->dwl_tail_ptr, (pb_ptr)->dwl_len, \
- (pb_ptr)->dwl_size), (fail_val)) \
+ (pb_ptr)->dwl_size, (fail_val)) \
\
if ( entry_ptr->delay_write_until > pb_ptr->max_delay ) \
pb_ptr->max_delay = entry_ptr->delay_write_until; \
@@ -1346,7 +1681,7 @@ if ( ( (pb_ptr) == NULL ) || \
{ \
HDassert( (pb_ptr) ); \
HDassert( (pb_ptr)->magic == H5PB__H5PB_T_MAGIC ); \
- HDassert( (pb_ptr)->vfd_swmr_writer ) \
+ HDassert( (pb_ptr)->vfd_swmr_writer ); \
HDassert( (entry_ptr) ); \
HDassert( (entry_ptr)->magic == H5PB__H5PB_ENTRY_T_MAGIC ); \
HDassert( (entry_ptr)->size >= pb_ptr->page_size ); \
@@ -1421,6 +1756,9 @@ if ( ( (pb_ptr) == NULL ) || \
* If there are multiple entries in any hash bin, they are stored in a doubly
* linked list.
*
+ * To facilitate flushing the page buffer, we also maintain a doubly linked
+ * list of all entries in the page buffer.
+ *
* ht_next: Next pointer used by the hash table to store multiple
* entries in a single hash bin. This field points to the
* next entry in the doubly linked list of entries in the
@@ -1431,6 +1769,16 @@ if ( ( (pb_ptr) == NULL ) || \
* previous entry in the doubly linked list of entries in
* the hash bin, or NULL if there is no previuos entry.
*
+ * il_next: Next pointer used by the index to maintain a doubly linked
+ * list of all entries in the index (and thus in the page buffer).
+ * This field contains a pointer to the next entry in the
+ * index list, or NULL if there is no next entry.
+ *
+ * il_prev: Prev pointer used by the index to maintain a doubly linked
+ * list of all entries in the index (and thus in the page buffer).
+ * This field contains a pointer to the previous entry in the
+ * index list, or NULL if there is no previous entry.
+ *
*
* Fields supporting replacement policies:
*
@@ -1487,13 +1835,12 @@ if ( ( (pb_ptr) == NULL ) || \
*
****************************************************************************/
-
#define H5PB__H5PB_ENTRY_T_MAGIC 0x02030405
struct H5PB_entry_t {
uint32_t magic;
- H5PB_t *pb_ptr;
+ H5PB_t *pb_ptr;
haddr_t addr;
uint64_t page;
size_t size;
@@ -1505,6 +1852,8 @@ struct H5PB_entry_t {
/* fields supporting the hash table: */
struct H5PB_entry_t *ht_next;
struct H5PB_entry_t *ht_prev;
+ struct H5PB_entry_t *il_next;
+ struct H5PB_entry_t *il_prev;
/* fields supporting replacement policies: */
struct H5PB_entry_t *next;
diff --git a/src/H5PBprivate.h b/src/H5PBprivate.h
index 2c1f3cb..7aabcd5 100644
--- a/src/H5PBprivate.h
+++ b/src/H5PBprivate.h
@@ -140,22 +140,87 @@ typedef struct H5PB_entry_t H5PB_entry_t;
* hash to the same bucket. That said, we must collect statistics to alert
* us should this not be the case.
*
+ * We also maintain a linked list of all entries in the index to facilitate
+ * flush operations.
+ *
* index Array of pointer to H5PB_entry_t of size
* H5PB__HASH_TABLE_LEN. This size must ba a power of 2,
* not the usual prime number.
*
* index_len: Number of entries currently in the hash table used to index
- * the page buffer.
+ * the page buffer. index_len should always equal
+ * clean_index_len + dirty_index_len.
+ *
+ * clean_index_len: Number of clean entries currently in the hash table
+ * used to index the page buffer.
+ *
+ * dirty_index_len: Number of dirty entries currently in the hash table
+ * used to index the page buffer.
*
* index_size: Number of bytes currently stored in the hash table used to
* index the page buffer. Under normal circumstances, this
* value will be index_len * page size. However, if
* vfd_swmr_writer is TRUE, it may be larger.
*
+ * index_size should always equal clean_index_size +
+ * dirty_index_size.
+ *
+ * clean_index_size: Number of bytes of clean entries currently stored in
+ * the hash table used to index the page buffer.
+ *
+ * dirty_index_size: Number of bytes of dirty entries currently stored in
+ * the hash table used to index the page buffer.
+ *
+ * il_len: Number of entries on the index list.
+ *
+ * This must always be equal to index_len. As such, this
+ * field is redundant. However, the existing linked list
+ * management macros expect to maintain a length field, so
+ * this field exists primarily to avoid adding complexity to
+ * these macros.
+ *
+ * il_size: Number of bytes of cache entries currently stored in the
+ * index list.
+ *
+ * This must always be equal to index_size. As such, this
+ * field is redundant. However, the existing linked list
+ * management macros expect to maintain a size field, so
+ * this field exists primarily to avoid adding complexity to
+ * these macros.
+ *
+ * il_head: Pointer to the head of the doubly linked list of entries in
+ * the index list. Note that cache entries on this list are
+ * linked by their il_next and il_prev fields.
+ *
+ * This field is NULL if the index is empty.
+ *
+ * il_tail: Pointer to the tail of the doubly linked list of entries in
+ * the index list. Note that cache entries on this list are
+ * linked by their il_next and il_prev fields.
+ *
+ * This field is NULL if the index is empty.
+
+ *
+ *
* Fields supporting the modified LRU policy:
*
* See most any OS text for a discussion of the LRU replacement policy.
*
+ * Under normal operating circumstances (i.e. vfd_swmr_writer is FALSE)
+ * all entries will reside both in the index and in the LRU. Further,
+ * all entries will be of size page_size.
+ *
+ * The VFD SWMR writer case (i.e. vfd_swmr_writer is TRUE) is complicated
+ * by the requirements that we:
+ *
+ * 1) buffer all metadat writes (including multi-page metadata writes) that
+ * occur during a tick, and
+ *
+ * 2) when necessary, delay metadata writes for up to max_lag ticks to
+ * avoid message from the future bugs on the VFD SWMR readers.
+ *
+ * See discussion of fields supporting VFD SWMR below for details.
+ *
* Discussions of the individual fields used by the modified LRU replacement
* policy follow:
*
@@ -183,7 +248,43 @@ typedef struct H5PB_entry_t H5PB_entry_t;
* This field is NULL if the list is empty.
*
*
- * FIELDS FOR VFD SWMR:
+ * FIELDS SUPPORTING VFD SWMR:
+ *
+ * If the file is opened as a VFD SWMR writer (i.e. vfd_swmr_writer == TRUE),
+ * the page buffer must retain the data necessary to update the metadata
+ * file at the end of each tick, and also delay writes as necessary so as
+ * to avoid message from the future bugs on the VFD SWMR readers.
+ *
+ * The tick list exists to allow us to buffer copies of all metadata writes
+ * during a tick, and the delayed write list supports delayed writes.
+ *
+ * If a regular page is written to during a tick, it is placed on the tick
+ * list. If there is no reason to delay its write to file (i.e. either
+ * it was just allocated, or it has existed in the metadata file index for
+ * at least max_lag ticks), it is also placed on the LRU, where it may be
+ * flushed, but not evicted. If its write must be delayed, it is placed on
+ * the delayed write list, where it must remain until its write delay is
+ * satisfied -- at which point it is moved to the LRU.
+ *
+ * If a multi-page metadata entry is written during a tick, it is placed on
+ * the tick list. If, in addition, the write of the entry must be delayed,
+ * it is also place on the delayed write list. Note that multi-page metadata
+ * entries may never appear on the LRU.
+ *
+ * At the end of each tick, the tick list is emptied.
+ *
+ * Regular pages are simply removed from the tick list, as they must already
+ * appear on either the LRU or the delayed write list.
+ *
+ * Multi-page metadata entries that are not also on the delayed write list
+ * are simply flushed and evicted.
+ *
+ * The delayed write list is also scanned at the end of each tick. Regular
+ * entries that are now flushable are placed at the head of the LRU. Multi-
+ * page metadata entries that are flushable are flushed and evicted.
+ *
+ * The remainder of this sections contains discussions of the fields and
+ * data structures used to support the above operations.
*
* vfd_swmr_writer: Boolean flag that is set to TRUE iff the file is
* the file is opened in VFD SWMR mode. The remaining
@@ -205,8 +306,8 @@ typedef struct H5PB_entry_t H5PB_entry_t;
* likely be perciived as file corruption by the reader.
*
* To facilitate identification of entries that must be removed from the
- * DWL, the list always observes the following invarient for any entry
- * on the list:
+ * DWL during the end of tick scan, the list always observes the following
+ * invarient for any entry on the list:
*
* entry_ptr->next == NULL ||
* entry_ptr->delay_write_until >= entry_ptr->next->delay_write_until
@@ -384,8 +485,16 @@ typedef struct H5PB_entry_t H5PB_entry_t;
*
* max_index_len: Largest value attained by the index_len field.
*
+ * max_clean_index_len: Largest value attained by the clean_index_len field.
+ *
+ * max_dirty_index_len: Largest value attained by the dirty_index_len field.
+ *
* max_index_size: Largest value attained by the index_size field.
*
+ * max_clean_index_size: Largest value attained by the clean_index_size field.
+ *
+ * max_dirty_index_size: Largest value attained by the dirty_index_size field.
+ *
* max_rd_pages: Maximum number of raw data pages in the page buffer.
*
* max_md_pages: Maximum number of metadata pages in the page buffer.
@@ -459,7 +568,15 @@ typedef struct H5PB_t {
/* index */
H5PB_entry_t *(ht[H5PB__HASH_TABLE_LEN]);
int64_t index_len;
+ int64_t clean_index_len;
+ int64_t dirty_index_len;
int64_t index_size;
+ int64_t clean_index_size;
+ int64_t dirty_index_size;
+ int64_t il_len;
+ int64_t il_size;
+ H5PB_entry_t * il_head;
+ H5PB_entry_t * il_tail;
/* LRU */
int64_t LRU_len;
@@ -518,7 +635,11 @@ typedef struct H5PB_t {
int64_t failed_ht_searches;
int64_t total_failed_ht_search_depth;
int64_t max_index_len;
+ int64_t max_clean_index_len;
+ int64_t max_dirty_index_len;
int64_t max_index_size;
+ int64_t max_clean_index_size;
+ int64_t max_dirty_index_size;
int64_t max_rd_pages;
int64_t max_md_pages;
@@ -567,6 +688,17 @@ H5_DLL herr_t H5PB_read(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
H5_DLL herr_t H5PB_write(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size,
const void *buf);
+
+/* VFD SWMR specific routines */
+H5_DLL herr_t H5PB_vfd_swmr__release_delayed_writes(H5F_t * f);
+
+H5_DLL herr_t H5PB_vfd_swmr__release_tick_list(H5F_t * f);
+
+H5_DLL herr_t H5PB_vfd_swmr__update_index(H5F_t * f, int * idx_ent_added_ptr,
+ int * idx_ent_modified_ptr, int * idx_ent_not_in_tl_ptr,
+ int * idx_ent_not_in_tl_flushed_ptr);
+
+
/* Statistics routines */
H5_DLL herr_t H5PB_reset_stats(H5PB_t *page_buf);
@@ -576,6 +708,7 @@ H5_DLL herr_t H5PB_get_stats(const H5PB_t *page_buf, unsigned accesses[2],
H5_DLL herr_t H5PB_print_stats(const H5PB_t *page_buf);
+
/* test & debug functions */
H5_DLL herr_t H5PB_page_exists(H5F_t *f, haddr_t addr,
hbool_t *page_exists_ptr);