summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/H5FDprivate.h2
-rw-r--r--src/H5Fint.c4
-rw-r--r--src/H5Fpkg.h23
-rw-r--r--src/H5Ftest.c19
-rw-r--r--src/H5Fvfd_swmr.c223
-rw-r--r--src/H5PB.c67
6 files changed, 195 insertions, 143 deletions
diff --git a/src/H5FDprivate.h b/src/H5FDprivate.h
index c6a53d6..9df0c13 100644
--- a/src/H5FDprivate.h
+++ b/src/H5FDprivate.h
@@ -347,6 +347,8 @@ H5_DLL herr_t H5FD_set_paged_aggr(H5FD_t *file, hbool_t paged);
H5_DLL herr_t H5FD_get_driver_name(const H5FD_t *file, char **driver_name);
/* Function prototypes for VFD SWMR */
+H5_DLL int vfd_swmr_idx_entry_defer_free(struct H5F_shared_t *,
+ const H5FD_vfd_swmr_idx_entry_t *);
H5_DLL herr_t H5FD_vfd_swmr_get_tick_and_idx(H5FD_t *_file, hbool_t read_index,
uint64_t *tick_ptr, uint32_t *num_entries_ptr,
H5FD_vfd_swmr_idx_entry_t index[]);
diff --git a/src/H5Fint.c b/src/H5Fint.c
index 282a9de..661ec40 100644
--- a/src/H5Fint.c
+++ b/src/H5Fint.c
@@ -1117,9 +1117,7 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F
f->shared->vfd_swmr_md_fd = -1;
f->shared->fs_man_md = NULL;
- f->shared->dl_head_ptr = NULL;
- f->shared->dl_tail_ptr = NULL;
- f->shared->dl_len = 0;
+ TAILQ_INIT(&f->shared->old_images);
/* Get the VOL connector info */
if(H5F__set_vol_conn(f) < 0)
diff --git a/src/H5Fpkg.h b/src/H5Fpkg.h
index 8acd919..4dfcdc2 100644
--- a/src/H5Fpkg.h
+++ b/src/H5Fpkg.h
@@ -225,17 +225,15 @@ typedef struct H5F_mtab_t {
* length: The length of the metadata page or multi page
* metadata entry in BYTES.
* tick_num: Sequence # of the current tick
- * next: Pointer to the next entry in the the list
- * prev: Pointer to the previous entry in the list
+ * link: tailqueue linkage
*/
-typedef struct H5F_vfd_swmr_dl_entry_t {
+typedef struct old_image {
uint64_t hdf5_page_offset;
uint64_t md_file_page_offset;
uint32_t length;
uint64_t tick_num;
- struct H5F_vfd_swmr_dl_entry_t *next;
- struct H5F_vfd_swmr_dl_entry_t *prev;
-} H5F_vfd_swmr_dl_entry_t;
+ TAILQ_ENTRY(old_image) link;
+} old_image_t;
/* Structure specifically to store superblock. This was originally
* maintained entirely within H5F_shared_t, but is now extracted
@@ -266,7 +264,9 @@ typedef struct deferred_free {
uint64_t free_after_tick;
} deferred_free_t;
-typedef SIMPLEQ_HEAD(deferred_free_head, deferred_free) deferred_free_head_t;
+typedef SIMPLEQ_HEAD(deferred_free_queue, deferred_free) deferred_free_queue_t;
+
+typedef TAILQ_HEAD(old_image_queue, old_image) old_image_queue_t;
/*
* Define the structure to store the file information for HDF5 files. One of
@@ -410,7 +410,7 @@ struct H5F_shared_t {
*/
uint64_t tick_num; /* Number of the current tick */
struct timespec end_of_tick; /* End time of the current tick */
- deferred_free_head_t deferred_frees; /* For use by VFD SWMR writers. */
+ deferred_free_queue_t deferred_frees; /* For use by VFD SWMR writers. */
/* VFD SWMR metadata file index */
H5FD_vfd_swmr_idx_entry_t * mdf_idx; /* pointer to an array of instance
* of H5FD_vfd_swmr_idx_entry_t of
@@ -467,12 +467,7 @@ struct H5F_shared_t {
*/
/* Delayed free space release doubly linked list */
- uint32_t dl_len; /* # of entries in the list */
- H5F_vfd_swmr_dl_entry_t *dl_head_ptr; /* Points to the beginning of
- * the list
- */
- H5F_vfd_swmr_dl_entry_t *dl_tail_ptr; /* Points to the end of the list */
-
+ old_image_queue_t old_images;
char *extpath; /* Path for searching target external link file */
#ifdef H5_HAVE_PARALLEL
diff --git a/src/H5Ftest.c b/src/H5Ftest.c
index cebbbe9..90657ef 100644
--- a/src/H5Ftest.c
+++ b/src/H5Ftest.c
@@ -526,6 +526,17 @@ done:
FUNC_LEAVE_NOAPI(ret_value)
} /* H5F__vfd_swmr_verify_md_hdr_and_idx() */
+static unsigned
+count_old_images(old_image_queue_t *old_images)
+{
+ old_image_t *old_image;
+ unsigned count = 0;
+
+ TAILQ_FOREACH(old_image, old_images, link)
+ count++;
+
+ return count;
+}
/*-------------------------------------------------------------------------
@@ -536,15 +547,15 @@ done:
* --info read from the metadata file is as indicated by
* the input: num_entries, index
* --# of entries on the delayed list is as indicated by
- * the input: num_dl_entries
+ * the input: nold_images
*
* Return: SUCCEED/FAIL
*
*-------------------------------------------------------------------------
*/
herr_t
-H5F__vfd_swmr_writer_md_test(hid_t file_id, unsigned num_entries, H5FD_vfd_swmr_idx_entry_t *index,
- unsigned num_dl_entries)
+H5F__vfd_swmr_writer_md_test(hid_t file_id, unsigned num_entries,
+ H5FD_vfd_swmr_idx_entry_t *index, unsigned nold_images)
{
H5F_t *f; /* File pointer */
int md_fd = -1; /* The metadata file descriptor */
@@ -566,7 +577,7 @@ H5F__vfd_swmr_writer_md_test(hid_t file_id, unsigned num_entries, H5FD_vfd_swmr_
HGOTO_ERROR(H5E_FILE, H5E_CANTALLOC, FAIL, "error updating the md file with the index")
/* Verify the number of entries in the delayed list is as expected */
- if(f->shared->dl_len < num_dl_entries)
+ if(count_old_images(&f->shared->old_images) < nold_images)
HGOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "incorrect # of entries in the delayed list")
/* Open the metadata file */
diff --git a/src/H5Fvfd_swmr.c b/src/H5Fvfd_swmr.c
index 72c2aec..11187b1 100644
--- a/src/H5Fvfd_swmr.c
+++ b/src/H5Fvfd_swmr.c
@@ -57,38 +57,6 @@
#define nanosecs_per_second 1000000000 /* nanoseconds per second */
#define nanosecs_per_tenth_sec 100000000 /* nanoseconds per 0.1 second */
-/* Remove an entry from the doubly linked list */
-#define H5F__LL_REMOVE(entry_ptr, head_ptr, tail_ptr) \
-{ \
- if((head_ptr) == (entry_ptr)) { \
- (head_ptr) = (entry_ptr)->next; \
- if((head_ptr) != NULL ) \
- (head_ptr)->prev = NULL; \
- } else \
- (entry_ptr)->prev->next = (entry_ptr)->next; \
- if((tail_ptr) == (entry_ptr)) { \
- (tail_ptr) = (entry_ptr)->prev; \
- if((tail_ptr) != NULL) \
- (tail_ptr)->next = NULL; \
- } else \
- (entry_ptr)->next->prev = (entry_ptr)->prev; \
- entry_ptr->next = NULL; \
- entry_ptr->prev = NULL; \
-} /* H5F__LL_REMOVE() */
-
-/* Prepend an entry to the doubly linked list */
-#define H5F__LL_PREPEND(entry_ptr, head_ptr, tail_ptr) \
-{ \
- if((head_ptr) == NULL) { \
- (head_ptr) = (entry_ptr); \
- (tail_ptr) = (entry_ptr); \
- } else { \
- (head_ptr)->prev = (entry_ptr); \
- (entry_ptr)->next = (head_ptr); \
- (head_ptr) = (entry_ptr); \
- } \
-} /* H5F__LL_PREPEND() */
-
/********************/
/* Local Prototypes */
/********************/
@@ -118,6 +86,23 @@ unsigned int vfd_swmr_api_entries_g = 0;/* Times the library was entered
* on the 0->1 and 1->0
* transitions.
*/
+static const bool ldbg_enabled = false;
+
+static void
+ldbgf(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+
+ if (!ldbg_enabled)
+ return;
+
+ (void)vprintf(fmt, ap);
+
+ va_end(ap);
+}
+
/*
* The head of the end of tick queue (EOT queue) for files opened in either
* VFD SWMR write or VFD SWMR read mode
@@ -128,8 +113,8 @@ eot_queue_t eot_queue_g = TAILQ_HEAD_INITIALIZER(eot_queue_g);
/* Local Variables */
/*******************/
-/* Declare a free list to manage the H5F_vfd_swmr_dl_entry_t struct */
-H5FL_DEFINE(H5F_vfd_swmr_dl_entry_t);
+/* Declare a free list to manage the old_image_t struct */
+H5FL_DEFINE(old_image_t);
/* Declare a free list to manage the eot_queue_entry_t struct */
H5FL_DEFINE(eot_queue_entry_t);
@@ -316,7 +301,7 @@ done:
herr_t
H5F_vfd_swmr_close_or_flush(H5F_t *f, hbool_t closing)
{
- H5F_vfd_swmr_dl_entry_t *curr, *next;
+ old_image_t *curr;
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(FAIL)
@@ -360,14 +345,12 @@ H5F_vfd_swmr_close_or_flush(H5F_t *f, hbool_t closing)
"unable to close the free-space manager for the metadata file")
/* Free the delayed list */
- for (curr = f->shared->dl_head_ptr;
- curr != NULL && (next = curr->next, true);
- curr = next) {
- H5FL_FREE(H5F_vfd_swmr_dl_entry_t, curr);
+ while ((curr = TAILQ_FIRST(&f->shared->old_images)) != NULL) {
+ TAILQ_REMOVE(&f->shared->old_images, curr, link);
+ H5FL_FREE(old_image_t, curr);
}
- f->shared->dl_head_ptr = f->shared->dl_tail_ptr = NULL;
-
+ assert(TAILQ_EMPTY(&f->shared->old_images));
} else { /* For file flush */
/* Update end_of_tick */
@@ -383,6 +366,26 @@ done:
} /* H5F_vfd_swmr_close_or_flush() */
+int
+vfd_swmr_idx_entry_defer_free(H5F_shared_t *shared,
+ const H5FD_vfd_swmr_idx_entry_t *entry)
+{
+ old_image_t *old_image;
+
+ if (NULL == (old_image = H5FL_CALLOC(old_image_t)))
+ return -1;
+
+ old_image->hdf5_page_offset = entry->hdf5_page_offset;
+ old_image->md_file_page_offset = entry->md_file_page_offset;
+ old_image->length = entry->length;
+ old_image->tick_num = shared->tick_num;
+
+ if (TAILQ_EMPTY(&shared->old_images))
+ ldbgf("Adding to the old images list.\n");
+
+ TAILQ_INSERT_HEAD(&shared->old_images, old_image, link);
+ return 0;
+}
/*-------------------------------------------------------------------------
@@ -421,14 +424,14 @@ done:
*/
herr_t
H5F_update_vfd_swmr_metadata_file(H5F_t *f, uint32_t num_entries,
- H5FD_vfd_swmr_idx_entry_t index[])
+ H5FD_vfd_swmr_idx_entry_t *index)
{
- H5F_vfd_swmr_dl_entry_t *prev; /* Points to the previous entry
- * in the delayed list
- */
- H5F_vfd_swmr_dl_entry_t *dl_entry; /* Points to an entry in the
- * delayed list
- */
+ old_image_t *prev; /* Points to the previous entry
+ * in the delayed list
+ */
+ old_image_t *old_image; /* Points to an entry in the
+ * delayed list
+ */
haddr_t md_addr; /* Address in the metadata file */
unsigned i; /* Local index variable */
herr_t ret_value = SUCCEED; /* Return value */
@@ -436,10 +439,16 @@ H5F_update_vfd_swmr_metadata_file(H5F_t *f, uint32_t num_entries,
FUNC_ENTER_NOAPI(FAIL)
/* Sort index entries by increasing offset in the HDF5 file */
- if ( num_entries ) {
-
- HDqsort(index, num_entries, sizeof(H5FD_vfd_swmr_idx_entry_t),
- H5F__idx_entry_cmp);
+ if (num_entries > 0) {
+ HDqsort(index, num_entries, sizeof(*index), H5F__idx_entry_cmp);
+#if 0
+ /* Assert that there are not any HDF5 page offsets duplicated in
+ * here.
+ */
+ for (i = 1; i < num_entries; i++) {
+ assert(index[i].hdf5_page_offset != index[i - 1].hdf5_page_offset);
+ }
+#endif
}
/* For each non-null entry_ptr in the index:
@@ -456,69 +465,60 @@ H5F_update_vfd_swmr_metadata_file(H5F_t *f, uint32_t num_entries,
*/
for ( i = 0; i < num_entries; i++ ) {
- if ( index[i].entry_ptr != NULL ) {
-
- /* Prepend previous image of the entry to the delayed list */
- if ( index[i].md_file_page_offset ) {
-
- if ( NULL == (dl_entry = H5FL_CALLOC(H5F_vfd_swmr_dl_entry_t)))
-
- HGOTO_ERROR(H5E_FILE, H5E_CANTALLOC, FAIL, \
- "unable to allocate the delayed entry")
-
- dl_entry->hdf5_page_offset = index[i].hdf5_page_offset;
- dl_entry->md_file_page_offset = index[i].md_file_page_offset;
- dl_entry->length = index[i].length;
- dl_entry->tick_num = f->shared->tick_num;
-
- H5F__LL_PREPEND(dl_entry, f->shared->dl_head_ptr, f->shared->dl_tail_ptr);
- f->shared->dl_len++;
+ if (index[i].entry_ptr == NULL)
+ continue;
+
+ /* Prepend previous image of the entry to the delayed list */
+ if ( index[i].md_file_page_offset ) {
+ if (vfd_swmr_idx_entry_defer_free(f->shared, &index[i]) == -1) {
+ HGOTO_ERROR(H5E_FILE, H5E_CANTALLOC, FAIL, \
+ "unable to allocate the delayed entry")
}
+ }
- /* Allocate space for the entry in the metadata file */
- if((md_addr = H5MV_alloc(f, index[i].length)) == HADDR_UNDEF)
+ /* Allocate space for the entry in the metadata file */
+ if((md_addr = H5MV_alloc(f, index[i].length)) == HADDR_UNDEF)
- HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, \
- "error in allocating space from the metadata file")
+ HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, \
+ "error in allocating space from the metadata file")
- /* Compute checksum and update the index entry */
- index[i].md_file_page_offset = md_addr/f->shared->fs_page_size;
- index[i].chksum = H5_checksum_metadata(index[i].entry_ptr,
- (size_t)(index[i].length), 0);
+ /* Compute checksum and update the index entry */
+ index[i].md_file_page_offset = md_addr/f->shared->fs_page_size;
+ index[i].chksum = H5_checksum_metadata(index[i].entry_ptr,
+ index[i].length, 0);
#if 0 /* JRM */
- HDfprintf(stderr,
- "writing index[%d] fo/mdfo/l/chksum/fc/lc = %lld/%lld/%ld/%lx/%lx/%lx\n",
- i,
- index[i].hdf5_page_offset,
- index[i].md_file_page_offset,
- index[i].length,
- index[i].chksum,
- (((char*)(index[i].entry_ptr))[0]),
- (((char*)(index[i].entry_ptr))[4095]));
-
- HDassert(md_addr == index[i].md_file_page_offset *
- f->shared->fs_page_size);
- HDassert(f->shared->fs_page_size == 4096);
+ HDfprintf(stderr,
+ "writing index[%d] fo/mdfo/l/chksum/fc/lc = %lld/%lld/%ld/%lx/%lx/%lx\n",
+ i,
+ index[i].hdf5_page_offset,
+ index[i].md_file_page_offset,
+ index[i].length,
+ index[i].chksum,
+ (((char*)(index[i].entry_ptr))[0]),
+ (((char*)(index[i].entry_ptr))[4095]));
+
+ HDassert(md_addr == index[i].md_file_page_offset *
+ f->shared->fs_page_size);
+ HDassert(f->shared->fs_page_size == 4096);
#endif /* JRM */
- /* Seek and write the entry to the metadata file */
- if ( HDlseek(f->shared->vfd_swmr_md_fd, (HDoff_t)md_addr,
- SEEK_SET) < 0)
+ /* Seek and write the entry to the metadata file */
+ if ( HDlseek(f->shared->vfd_swmr_md_fd, (HDoff_t)md_addr,
+ SEEK_SET) < 0)
- HGOTO_ERROR(H5E_FILE, H5E_SEEKERROR, FAIL, \
- "unable to seek in the metadata file")
+ HGOTO_ERROR(H5E_FILE, H5E_SEEKERROR, FAIL, \
+ "unable to seek in the metadata file")
- if ( HDwrite(f->shared->vfd_swmr_md_fd, index[i].entry_ptr,
- index[i].length) != (ssize_t)index[i].length )
+ if ( HDwrite(f->shared->vfd_swmr_md_fd, index[i].entry_ptr,
+ index[i].length) != (ssize_t)index[i].length )
- HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, \
- "error in writing the page/multi-page entry to metadata file")
+ HGOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, \
+ "error in writing the page/multi-page entry to metadata file")
- /* Set entry_ptr to NULL */
- index[i].entry_ptr = NULL;
+ /* Set entry_ptr to NULL */
+ index[i].entry_ptr = NULL;
- } /* end if */
} /* end for */
/* Construct and write index to the metadata file */
@@ -544,27 +544,25 @@ H5F_update_vfd_swmr_metadata_file(H5F_t *f, uint32_t num_entries,
* --remove the associated entries from the list
*/
- for (dl_entry = f->shared->dl_tail_ptr;
- dl_entry != NULL && (prev = dl_entry->prev, true);
- dl_entry = prev) {
+ TAILQ_FOREACH_REVERSE_SAFE(old_image, &f->shared->old_images,
+ old_image_queue, link, prev) {
/* max_lag is at least 3 */
if ( ( f->shared->tick_num > f->shared->vfd_swmr_config.max_lag ) &&
- ( dl_entry->tick_num <=
+ ( old_image->tick_num <=
f->shared->tick_num - f->shared->vfd_swmr_config.max_lag ) ) {
- if ( H5MV_free(f, dl_entry->md_file_page_offset *
- f->shared->fs_page_size, dl_entry->length) < 0 )
+ if ( H5MV_free(f, old_image->md_file_page_offset *
+ f->shared->fs_page_size, old_image->length) < 0 )
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \
"unable to flush clean entry")
/* Remove the entry from the delayed list */
- H5F__LL_REMOVE(dl_entry, f->shared->dl_head_ptr, f->shared->dl_tail_ptr)
- f->shared->dl_len--;
+ TAILQ_REMOVE(&f->shared->old_images, old_image, link);
/* Free the delayed entry struct */
- H5FL_FREE(H5F_vfd_swmr_dl_entry_t, dl_entry);
+ H5FL_FREE(old_image_t, old_image);
} else {
@@ -572,6 +570,9 @@ H5F_update_vfd_swmr_metadata_file(H5F_t *f, uint32_t num_entries,
}
}
+ if (TAILQ_EMPTY(&f->shared->old_images))
+ ldbgf("Emptied the old images list.\n");
+
done:
FUNC_LEAVE_NOAPI(ret_value)
diff --git a/src/H5PB.c b/src/H5PB.c
index a5bf155..2a7e2ee 100644
--- a/src/H5PB.c
+++ b/src/H5PB.c
@@ -1133,6 +1133,37 @@ done:
} /* end H5PB_read() */
+static int
+vfd_swmr_mdf_idx_entry_remove(H5F_shared_t *shared, uint64_t page)
+{
+ ptrdiff_t idx_idx, last_idx;
+ H5FD_vfd_swmr_idx_entry_t *idx_entry;
+
+ idx_entry = vfd_swmr_pageno_to_mdf_idx_entry(shared->mdf_idx,
+ shared->mdf_idx_entries_used, page);
+
+ if (idx_entry == NULL)
+ return 0;
+
+ if (shared->vfd_swmr_writer && idx_entry->md_file_page_offset != 0 &&
+ vfd_swmr_idx_entry_defer_free(shared, idx_entry) != 0)
+ return -1;
+
+ idx_idx = idx_entry - shared->mdf_idx;
+ last_idx = shared->mdf_idx_entries_used - 1;
+
+ shared->mdf_idx[idx_idx] = shared->mdf_idx[last_idx];
+ if (shared->mdf_idx_entries_used > idx_idx + 1) {
+ const size_t ntocopy =
+ (size_t)(shared->mdf_idx_entries_used - (idx_idx + 1));
+ memmove(&shared->mdf_idx[idx_idx],
+ &shared->mdf_idx[idx_idx + 1],
+ ntocopy * sizeof(shared->mdf_idx[idx_idx + 1]));
+ }
+ shared->mdf_idx_entries_used--;
+ return 0;
+}
+
/*-------------------------------------------------------------------------
*
@@ -1239,22 +1270,34 @@ H5PB_remove_entry(H5F_shared_t *shared, haddr_t addr)
HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL, "forced eviction failed")
- /* Do we need to remove the entry from the metadata file index in
- * the VFD SWMR case?
+ /* We need to remove the entry from the shadow file index in
+ * the VFD SWMR case, so do that next.
*
- * Probably yes -- suppose a page is deallocated, and a multipage
- * metadata entry is allocated at the same base address. This would
- * change the metadata file entry size.
+ * If a multipage metadata entry is deallocated, and a new, single-page
+ * metadata entry is allocated at the same base address, then
+ * the old shadow index entry will still tell the size of the previous
+ * image, which is greater than a page, and a shadow-file flush will
+ * access bytes past the end of the entry's image.
*
- * However, this is sufficiently improbably that it doesn't cause
- * problems (that I know of) at present.
+ * When we add code to allow entries
+ * to age out of the metadata file index, that may provide
+ * code that we can reuse to perform this invalidation.
*
- * Unless it does, hold off on this until we add code to allow entries
- * to age out of the metadata file index, as that will give us the
- * necessary infrastructure.
+ * It's also possible (I think) for the index-entry size to be set
+ * to one page, and then for a multipage entry to appear later at that
+ * same index entry. The recorded size will still say the same, but
+ * the image will be bigger. So the shadow file will never see the
+ * entire image written, just the first page of the image.
*
- * JRM -- 12/6/18
+ * XXX The H5PB__evict_entry() call immediately prior should have called
+ * XXX vfd_swmr_mdf_idx_entry_remove() for this page. Need to
+ * XXX move this detailed comment and delete the redundant call to
+ * XXX vfd_swmr_mdf_idx_entry_remove(), no?
*/
+ if (vfd_swmr_mdf_idx_entry_remove(shared, page) == -1) {
+ HGOTO_ERROR(H5E_PAGEBUF, H5E_SYSTEM, FAIL,
+ "failed to remove shadow index entry")
+ }
}
done:
@@ -2369,6 +2412,8 @@ H5PB__evict_entry(H5F_shared_t *shared, H5PB_entry_t *entry_ptr, hbool_t force)
/* remove the entry from the hash table */
H5PB__DELETE_FROM_INDEX(pb_ptr, entry_ptr, FAIL)
+ vfd_swmr_mdf_idx_entry_remove(shared, entry_ptr->page);
+
/* update stats for eviction */
H5PB__UPDATE_STATS_FOR_EVICTION(pb_ptr, entry_ptr)