summaryrefslogtreecommitdiffstats
path: root/src/H5C.c
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2016-11-20 12:24:57 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2016-11-20 12:24:57 (GMT)
commit3b0c2b24da5689990c4bc0fcd3afecdf063086c8 (patch)
tree242d3501c98bef92e6befd920e8a9efdf2f1715b /src/H5C.c
parentf6ad126673553838df0dec514c5d2c1b4b70df1a (diff)
downloadhdf5-3b0c2b24da5689990c4bc0fcd3afecdf063086c8.zip
hdf5-3b0c2b24da5689990c4bc0fcd3afecdf063086c8.tar.gz
hdf5-3b0c2b24da5689990c4bc0fcd3afecdf063086c8.tar.bz2
Bring over support for retrying metadata cache entry loads, along with all the
supporting metadata cache callback changes, etc.
Diffstat (limited to 'src/H5C.c')
-rw-r--r--src/H5C.c373
1 files changed, 201 insertions, 172 deletions
diff --git a/src/H5C.c b/src/H5C.c
index a1477d4..147489b 100644
--- a/src/H5C.c
+++ b/src/H5C.c
@@ -167,6 +167,9 @@ static herr_t H5C__mark_flush_dep_dirty(H5C_cache_entry_t * entry);
static herr_t H5C__mark_flush_dep_clean(H5C_cache_entry_t * entry);
+static herr_t H5C__verify_len_eoa(H5F_t *f, const H5C_class_t * type,
+ haddr_t addr, size_t *len, hbool_t actual);
+
static herr_t H5C__generate_image(const H5F_t *f, H5C_t * cache_ptr, H5C_cache_entry_t *entry_ptr,
hid_t dxpl_id);
@@ -6136,6 +6139,70 @@ done:
/*-------------------------------------------------------------------------
*
+ * Function: H5C__verify_len_eoa
+ *
+ * Purpose: Verify that 'len' does not exceed eoa when 'actual' is
+ * false i.e. 'len" is the initial speculative length from
+ * get_load_size callback with null image pointer.
+ * If exceed, adjust 'len' accordingly.
+ *
+ * Verify that 'len' should not exceed eoa when 'actual' is
+ * true i.e. 'len' is the actual length from get_load_size
+ * callback with non-null image pointer.
+ * If exceed, return error.
+ *
+ * Return: FAIL if error is detected, SUCCEED otherwise.
+ *
+ * Programmer: Vailin Choi
+ * 9/6/15
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5C__verify_len_eoa(H5F_t *f, const H5C_class_t *type, haddr_t addr,
+ size_t *len, hbool_t actual)
+{
+ H5FD_mem_t cooked_type; /* Modified type, accounting for switching global heaps */
+ haddr_t eoa; /* End-of-allocation in the file */
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_STATIC
+
+ /* if type == H5FD_MEM_GHEAP, H5F_block_read() forces
+ * type to H5FD_MEM_DRAW via its call to H5F__accum_read().
+ * Thus we do the same for purposes of computing the EOA
+ * for sanity checks.
+ */
+ cooked_type = (type->mem_type == H5FD_MEM_GHEAP) ? H5FD_MEM_DRAW : type->mem_type;
+
+ /* Get the file's end-of-allocation value */
+ eoa = H5F_get_eoa(f, cooked_type);
+ if(!H5F_addr_defined(eoa))
+ HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, FAIL, "invalid EOA address for file")
+
+ /* Check for bad address in general */
+ if(H5F_addr_gt(addr, eoa))
+ HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, FAIL, "address of object past end of allocation")
+
+ /* Check if the amount of data to read will be past the EOA */
+ if(H5F_addr_gt((addr + *len), eoa)) {
+ if(actual)
+ HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, FAIL, "actual len exceeds EOA.")
+ else
+ /* Trim down the length of the metadata */
+ *len = (size_t)(eoa - addr);
+ } /* end if */
+
+ if(*len <= 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, FAIL, "len not positive after adjustment for EOA.")
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* H5C__verify_len_eoa() */
+
+
+/*-------------------------------------------------------------------------
+ *
* Function: H5C_load_entry
*
* Purpose: Attempt to load the entry at the specified disk address
@@ -6163,7 +6230,7 @@ H5C_load_entry(H5F_t * f,
void * udata)
{
hbool_t dirty = FALSE; /* Flag indicating whether thing was dirtied during deserialize */
- void * image = NULL; /* Buffer for disk image */
+ uint8_t * image = NULL; /* Buffer for disk image */
void * thing = NULL; /* Pointer to thing loaded */
H5C_cache_entry_t *entry = NULL; /* Alias for thing loaded, as cache entry */
size_t len; /* Size of image in file */
@@ -6176,10 +6243,18 @@ H5C_load_entry(H5F_t * f,
FUNC_ENTER_NOAPI_NOINIT
+ /* Sanity checks */
HDassert(f);
HDassert(f->shared);
HDassert(f->shared->cache);
HDassert(type);
+ HDassert(H5F_addr_defined(addr));
+ HDassert(type->get_initial_load_size);
+ if(type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG)
+ HDassert(type->get_final_load_size);
+ else
+ HDassert(NULL == type->get_final_load_size);
+ HDassert(type->deserialize);
/* Can't see how skip reads could be usefully combined with
* the speculative read flag. Hence disallow.
@@ -6187,59 +6262,19 @@ H5C_load_entry(H5F_t * f,
HDassert(!((type->flags & H5C__CLASS_SKIP_READS) &&
(type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG)));
- HDassert(H5F_addr_defined(addr));
- HDassert(type->get_load_size);
- HDassert(type->deserialize);
-
- /* Call the get_load_size callback, to retrieve the initial
- * size of image
- */
- if(type->get_load_size(udata, &len) < 0)
+ /* Call the get_initial_load_size callback, to retrieve the initial size of image */
+ if(type->get_initial_load_size(udata, &len) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, "can't retrieve image size")
-
HDassert(len > 0);
/* Check for possible speculative read off the end of the file */
- if(type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG) {
-
- haddr_t eoa; /* End-of-allocation in the file */
- H5FD_mem_t cooked_type;
-
- /* if type == H5FD_MEM_GHEAP, H5F_block_read() forces
- * type to H5FD_MEM_DRAW via its call to H5F__accum_read().
- * Thus we do the same for purposes of computing the eoa
- * for sanity checks.
- */
- cooked_type = (type->mem_type == H5FD_MEM_GHEAP) ? H5FD_MEM_DRAW : type->mem_type;
-
- /* Get the file's end-of-allocation value */
- eoa = H5F_get_eoa(f, cooked_type);
-
- HDassert(H5F_addr_defined(eoa));
-
- /* Check for bad address in general */
- if ( H5F_addr_gt(addr, eoa) )
- HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, "address of object past end of allocation")
-
- /* Check if the amount of data to read will be past the eoa */
- if( H5F_addr_gt((addr + len), eoa) ) {
-
- /* Trim down the length of the metadata */
- /* Note that for some cache clients, this will cause an
- * assertion failure. JRM -- 8/29/14
- */
- len = (size_t)(eoa - addr);
- } /* end if */
-
- if ( len <= 0 )
- HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, "len not positive after adjustment for EOA.")
-
- } /* end if */
+ if(type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG)
+ if(H5C__verify_len_eoa(f, type, addr, &len, FALSE) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, "invalid len with respect to EOA")
/* Allocate the buffer for reading the on-disk entry image */
- if(NULL == (image = H5MM_malloc(len + H5C_IMAGE_EXTRA_SPACE)))
+ if(NULL == (image = (uint8_t *)H5MM_malloc(len + H5C_IMAGE_EXTRA_SPACE)))
HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, NULL, "memory allocation failed for on disk image buffer.")
-
#if H5C_DO_MEMORY_SANITY_CHECKS
HDmemcpy(image + len, H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE);
#endif /* H5C_DO_MEMORY_SANITY_CHECKS */
@@ -6255,150 +6290,144 @@ H5C_load_entry(H5F_t * f,
/* Get the on-disk entry image */
if(0 == (type->flags & H5C__CLASS_SKIP_READS)) {
+ unsigned tries, max_tries; /* The # of read attempts */
+ unsigned retries; /* The # of retries */
+ htri_t chk_ret; /* return from verify_chksum callback */
+ size_t actual_len = len; /* The actual length, after speculative reads have been resolved */
+ uint64_t nanosec = 1; /* # of nanoseconds to sleep between retries */
+ void *new_image; /* Pointer to image */
+ hbool_t len_changed = TRUE; /* Whether to re-check speculative entries */
+
+ /* Get the # of read attempts */
+ max_tries = tries = H5F_GET_READ_ATTEMPTS(f);
+
+ /*
+ * This do/while loop performs the following till the metadata checksum
+ * is correct or the file's number of allowed read attempts are reached.
+ * --read the metadata
+ * --determine the actual size of the metadata
+ * --perform checksum verification
+ */
+ do {
+ if(actual_len != len) {
+ if(NULL == (new_image = H5MM_realloc(image, len + H5C_IMAGE_EXTRA_SPACE)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, NULL, "image null after H5MM_realloc()")
+ image = (uint8_t *)new_image;
+#if H5C_DO_MEMORY_SANITY_CHECKS
+ HDmemcpy(image + len, H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE);
+#endif /* H5C_DO_MEMORY_SANITY_CHECKS */
+ } /* end if */
+
#ifdef H5_HAVE_PARALLEL
- if(!coll_access || 0 == mpi_rank) {
+ if(!coll_access || 0 == mpi_rank) {
#endif /* H5_HAVE_PARALLEL */
-
- if(H5F_block_read(f, type->mem_type, addr, len, dxpl_id, image) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_READERROR, NULL, "Can't read image*")
-
+ if(H5F_block_read(f, type->mem_type, addr, len, dxpl_id, image) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_READERROR, NULL, "Can't read image*")
#ifdef H5_HAVE_PARALLEL
- } /* end if */
-
- /* if the collective metadata read optimization is turned on,
- * bcast the metadata read from process 0 to all ranks in the file
- * communicator
- */
- if(coll_access) {
- int buf_size;
+ } /* end if */
+ /* if the collective metadata read optimization is turned on,
+ * bcast the metadata read from process 0 to all ranks in the file
+ * communicator
+ */
+ if(coll_access) {
+ int buf_size;
- H5_CHECKED_ASSIGN(buf_size, int, len, size_t);
- if(MPI_SUCCESS != (mpi_code = MPI_Bcast(image, buf_size, MPI_BYTE, 0, comm)))
- HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code)
- } /* end if */
+ H5_CHECKED_ASSIGN(buf_size, int, len, size_t);
+ if(MPI_SUCCESS != (mpi_code = MPI_Bcast(image, buf_size, MPI_BYTE, 0, comm)))
+ HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code)
+ } /* end if */
#endif /* H5_HAVE_PARALLEL */
- } /* end if */
-
- /* Deserialize the on-disk image into the native memory form */
- if(NULL == (thing = type->deserialize(image, len, udata, &dirty)))
- HGOTO_ERROR(H5E_CACHE, H5E_CANTLOAD, NULL, "Can't deserialize image")
-
- /* If the client's cache has an image_len callback, check it */
- if(type->image_len) {
- size_t new_len; /* New size of on-disk image */
-
- /* set magic and type field in *entry_ptr. While the image_len
- * callback shouldn't touch the cache specific fields, it may check
- * these fields to ensure that it it has received the expected
- * value.
- *
- * Note that this initialization is repeated below on the off
- * chance that we had to re-try the deserialization.
- */
- entry = (H5C_cache_entry_t *)thing;
- entry->magic = H5C__H5C_CACHE_ENTRY_T_MAGIC;
- entry->type = type;
-
- /* Get the actual image size for the thing */
- if(type->image_len(thing, &new_len) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, "can't retrieve image length")
-
- if(new_len == 0)
- HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, "image length is 0")
-
- if(new_len != len) {
-
- if (type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG) {
-
- void *new_image; /* Buffer for disk image */
-
- /* Adjust the size of the image to match new_len */
- if(NULL == (new_image = H5MM_realloc(image,
- new_len + H5C_IMAGE_EXTRA_SPACE)))
-
- HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, NULL, "image null after H5MM_realloc()")
-
- image = new_image;
+ /* If the entry could be read speculatively and the length is still
+ * changing, check for updating the actual size
+ */
+ if((type->flags & H5C__CLASS_SPECULATIVE_LOAD_FLAG) && len_changed) {
+ /* Retrieve the actual length */
+ actual_len = len;
+ if(type->get_final_load_size(image, len, udata, &actual_len) < 0)
+ continue; /* Transfer control to while() and count towards retries */
+
+ /* Check for the length changing */
+ if(actual_len != len) {
+ /* Verify that the length isn't past the EOA for the file */
+ if(H5C__verify_len_eoa(f, type, addr, &actual_len, TRUE) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, "actual_len exceeds EOA.")
+
+ /* Expand buffer to new size */
+ if(NULL == (new_image = H5MM_realloc(image, actual_len + H5C_IMAGE_EXTRA_SPACE)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, NULL, "image null after H5MM_realloc()")
+ image = (uint8_t *)new_image;
#if H5C_DO_MEMORY_SANITY_CHECKS
- HDmemcpy(((uint8_t *)image) + new_len, H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE);
+ HDmemcpy(image + actual_len, H5C_IMAGE_SANITY_VALUE, H5C_IMAGE_EXTRA_SPACE);
#endif /* H5C_DO_MEMORY_SANITY_CHECKS */
- /* If the thing's image needs to be bigger for a speculatively
- * loaded thing, free the thing and retry with new length
- */
- if (new_len > len) {
-
- /* Release previous (possibly partially initialized)
- * thing. Note that we must set entry->magic to
- * H5C__H5C_CACHE_ENTRY_T_BAD_MAGIC and set one or
- * two other fields before the call to free_icr
- * so as to avoid sanity check failures.
- */
- entry->magic = H5C__H5C_CACHE_ENTRY_T_BAD_MAGIC;
-
- entry->addr = addr;
-
- if ( type->free_icr(thing) < 0 )
-
- HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL, "free_icr callback failed")
-
+ if(actual_len > len) {
#ifdef H5_HAVE_PARALLEL
- if(!coll_access || 0 == mpi_rank) {
+ if(!coll_access || 0 == mpi_rank) {
#endif /* H5_HAVE_PARALLEL */
-
- /* Go get the on-disk image again */
- if(H5F_block_read(f, type->mem_type, addr,
- new_len, dxpl_id, image) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTLOAD, NULL, "Can't read image")
-
+ /* If the thing's image needs to be bigger for a speculatively
+ * loaded thing, go get the on-disk image again (the extra portion).
+ */
+ if(H5F_block_read(f, type->mem_type, addr + len, actual_len - len, dxpl_id, image + len) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTLOAD, NULL, "can't read image")
#ifdef H5_HAVE_PARALLEL
- }
- /* if the collective metadata read optimization is turned on,
- bcast the metadata read from process 0 to all ranks in the file
- communicator */
- if(coll_access) {
- int buf_size;
-
- H5_CHECKED_ASSIGN(buf_size, int, new_len, size_t);
- if(MPI_SUCCESS != (mpi_code = MPI_Bcast(image, buf_size, MPI_BYTE, 0, comm)))
- HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code)
- } /* end if */
+ }
+ /* If the collective metadata read optimization is turned on,
+ * Bcast the metadata read from process 0 to all ranks in the file
+ * communicator */
+ if(coll_access) {
+ int buf_size;
+
+ H5_CHECKED_ASSIGN(buf_size, int, actual_len - len, size_t);
+ if(MPI_SUCCESS != (mpi_code = MPI_Bcast(image + len, buf_size, MPI_BYTE, 0, comm)))
+ HMPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code)
+ } /* end if */
#endif /* H5_HAVE_PARALLEL */
+ } /* end if */
+ } /* end if (actual_len != len) */
+ else {
+ /* The length has stabilized */
+ len_changed = FALSE;
- /* Deserialize on-disk image into native memory form again */
- if(NULL == (thing = type->deserialize(image, new_len, udata, &dirty)))
- HGOTO_ERROR(H5E_CACHE, H5E_CANTLOAD, NULL, "Can't deserialize image")
+ /* Set the final length */
+ len = actual_len;
+ } /* else */
+ } /* end if */
-#ifndef NDEBUG
- {
- size_t new_new_len;
+ /* If there's no way to verify the checksum for a piece of metadata
+ * (usually because there's no checksum in the file), leave now
+ */
+ if(type->verify_chksum == NULL)
+ break;
- /* Get the actual image size for the thing again. Note
- * that since this is a new thing, we have to set
- * the magic and type fields again so as to avoid
- * failing sanity checks.
- */
- entry = (H5C_cache_entry_t *)thing;
- entry->magic = H5C__H5C_CACHE_ENTRY_T_MAGIC;
- entry->type = type;
+ /* Verify the checksum for the metadata image */
+ if((chk_ret = type->verify_chksum(image, actual_len, udata)) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, NULL, "failure from verify_chksum callback")
+ if(chk_ret == TRUE)
+ break;
- type->image_len(thing, &new_new_len);
- HDassert(new_new_len == new_len);
- } /* end block */
-#endif /* NDEBUG */
- } /* end if (new_len > len) */
+ /* Sleep for some time */
+ H5_nanosleep(nanosec);
+ nanosec *= 2; /* Double the sleep time next time */
+ } while(--tries);
- /* Retain adjusted size */
- len = new_len;
+ /* Check for too many tries */
+ if(tries == 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_READERROR, NULL, "incorrect metadatda checksum after all read attempts")
- } else { /* throw an error */
+ /* Calculate and track the # of retries */
+ retries = max_tries - tries;
+ if(retries) /* Does not track 0 retry */
+ if(H5F_track_metadata_read_retries(f, (unsigned)type->mem_type, retries) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_BADVALUE, NULL, "cannot track read tries = %u ", retries)
- HGOTO_ERROR(H5E_CACHE, H5E_UNSUPPORTED, NULL, \
- "size of non-speculative object changed")
- }
- } /* end if (new_len != len) */
- } /* end if */
+ /* Set the final length (in case it wasn't set earlier) */
+ len = actual_len;
+ } /* end if !H5C__CLASS_SKIP_READS */
+
+ /* Deserialize the on-disk image into the native memory form */
+ if(NULL == (thing = type->deserialize(image, len, udata, &dirty)))
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTLOAD, NULL, "Can't deserialize image")
entry = (H5C_cache_entry_t *)thing;
@@ -6480,7 +6509,7 @@ done:
if(thing && type->free_icr(thing) < 0)
HDONE_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL, "free_icr callback failed")
if(image)
- image = H5MM_xfree(image);
+ image = (uint8_t *)H5MM_xfree(image);
} /* end if */
FUNC_LEAVE_NOAPI(ret_value)