summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjhendersonHDF <jhenderson@hdfgroup.org>2022-03-01 18:30:49 (GMT)
committerGitHub <noreply@github.com>2022-03-01 18:30:49 (GMT)
commit8e0b427bf2a22871318e1c3ca76717344ec0019f (patch)
treeb9284bf2f3f5326ffb3e02fa31f5eae0be728404
parent80954e82723086e573af37b31e85d0f3d562e9b3 (diff)
downloadhdf5-8e0b427bf2a22871318e1c3ca76717344ec0019f.zip
hdf5-8e0b427bf2a22871318e1c3ca76717344ec0019f.tar.gz
hdf5-8e0b427bf2a22871318e1c3ca76717344ec0019f.tar.bz2
Fix metadata cache bug when resizing a pinned/protected entry (v2) (#1463)
-rw-r--r--release_docs/RELEASE.txt26
-rw-r--r--src/H5AC.c73
2 files changed, 84 insertions, 15 deletions
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt
index 2346e2f..1fd6f2d 100644
--- a/release_docs/RELEASE.txt
+++ b/release_docs/RELEASE.txt
@@ -922,15 +922,6 @@ New Features
* Improved coverage of regression testing for the feature
- NOTE: we are aware of and working on a fix for an assertion failure that
- occurs during regression testing of the feature. It looks similar to:
-
- hdf5/src/H5ACmpio.c:1089: H5AC__log_moved_entry: Assertion `!entry_dirty' failed.
-
- so far, this assertion failure has only occurred during testing and has
- not been seen in any production uses, but until this bug is fixed the
- feature should be considered unstable.
-
(JTH - 2022/2/23)
Fortran Library:
@@ -1138,6 +1129,23 @@ Bug Fixes since HDF5-1.12.0 release
===================================
Library
-------
+ - Fixed a metadata cache bug when resizing a pinned/protected cache entry
+
+ When resizing a pinned/protected cache entry, the metadata
+ cache code previously would wait until after resizing the
+ entry to attempt to log the newly-dirtied entry. This would
+ cause H5C_resize_entry to mark the entry as dirty and make
+ H5AC_resize_entry think that it doesn't need to add the
+ newly-dirtied entry to the dirty entries skiplist.
+
+ Thus, a subsequent H5AC__log_moved_entry would think it
+ needs to allocate a new entry for insertion into the dirty
+ entry skip list, since the entry doesn't exist on that list.
+ This causes an assertion failure, as the code to allocate a
+ new entry assumes that the entry is not dirty.
+
+ (JRM - 2022/02/28)
+
- Issue #1436 identified a problem with the H5_VERS_RELEASE check in the
H5check_version function.
diff --git a/src/H5AC.c b/src/H5AC.c
index 47d3a65..ac28a8c 100644
--- a/src/H5AC.c
+++ b/src/H5AC.c
@@ -1440,21 +1440,82 @@ H5AC_resize_entry(void *thing, size_t new_size)
cache_ptr = entry_ptr->cache_ptr;
HDassert(cache_ptr);
- /* Resize the entry */
- if (H5C_resize_entry(thing, new_size) < 0)
- HGOTO_ERROR(H5E_CACHE, H5E_CANTRESIZE, FAIL, "can't resize entry")
-
#ifdef H5_HAVE_PARALLEL
- {
+ /* Log the generation of dirty bytes of metadata iff:
+ *
+ * 1) The entry is clean on entry, and this resize will dirty it
+ * (i.e. the current and new sizes are different), and
+ *
+ * 2) This is a parallel computation -- which it is if the aux_ptr
+ * is non-null.
+ *
+ * A few points to note about this section of the code:
+ *
+ * 1) This call must occur before the call to H5C_resize_entry() since
+ * H5AC__log_dirtied_entry() expects the target entry to be clean
+ * on entry.
+ *
+ * 2) This code has some basic issues in terms of the number of bytes
+ * added to the dirty bytes count.
+ *
+ * First, it adds the initial entry size to aux_ptr->dirty_bytes,
+ * not the final size. Note that this code used to use the final
+ * size, but code to support this has been removed from
+ * H5AC__log_dirtied_entry() for reasons unknown since I wrote this
+ * code.
+ *
+ * As long as all ranks do the same thing here, this probably doesn't
+ * matter much, although it will delay initiation of sync points.
+ *
+ * A more interesting point is that this code will not increment
+ * aux_ptr->dirty_bytes if a dirty entry is resized. At first glance
+ * this seems major, as particularly with the older file formats,
+ * resizes can be quite large. However, this is probably not an
+ * issue either, since such resizes will be accompanied by large
+ * amounts of dirty metadata creation in other areas -- which will
+ * cause aux_ptr->dirty_bytes to be incremented.
+ *
+ * The bottom line is that this code is probably OK, but the above
+ * points should be kept in mind.
+ *
+ * One final observation: This comment is occasioned by a bug caused
+ * by moving the call to H5AC__log_dirtied_entry() after the call to
+ * H5C_resize_entry(), and then only calling H5AC__log_dirtied_entry()
+ * if entry_ptr->is_dirty was false.
+ *
+ * Since H5C_resize_entry() marks the target entry dirty unless there
+ * is not change in size, this had the effect of not calling
+ * H5AC__log_dirtied_entry() when it should be, and corrupting
+ * the cleaned and dirtied lists used by rank 0 in the parallel
+ * version of the metadata cache.
+ *
+ * The point here is that you should be very careful when working with
+ * this code, and not modify it unless you fully understand it.
+ *
+ * JRM -- 2/28/22
+ */
+
+ if ((!entry_ptr->is_dirty) && (entry_ptr->size != new_size)) {
+
+ /* the entry is clean, and will be marked dirty in the resize
+ * operation.
+ */
H5AC_aux_t *aux_ptr;
aux_ptr = (H5AC_aux_t *)H5C_get_aux_ptr(cache_ptr);
- if ((!entry_ptr->is_dirty) && (NULL != aux_ptr))
+
+ if (NULL != aux_ptr) {
+
if (H5AC__log_dirtied_entry(entry_ptr) < 0)
HGOTO_ERROR(H5E_CACHE, H5E_CANTMARKDIRTY, FAIL, "can't log dirtied entry")
+ }
}
#endif /* H5_HAVE_PARALLEL */
+ /* Resize the entry */
+ if (H5C_resize_entry(thing, new_size) < 0)
+ HGOTO_ERROR(H5E_CACHE, H5E_CANTRESIZE, FAIL, "can't resize entry")
+
done:
/* If currently logging, generate a message */
if (cache_ptr != NULL && cache_ptr->log_info != NULL)