summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjhendersonHDF <jhenderson@hdfgroup.org>2024-03-18 01:48:16 (GMT)
committerGitHub <noreply@github.com>2024-03-18 01:48:16 (GMT)
commit840476ead85229ac4d7be1b6c9dd87ad5f8e3a07 (patch)
tree1ed470da7841f2de768dc76abcaf59d3511f018d
parentfa44de4cdbc125f644369834983c65811be12335 (diff)
downloadhdf5-develop.zip
hdf5-develop.tar.gz
hdf5-develop.tar.bz2
Fix an issue where the Subfiling VFD's context cache grows too large (#4159)develop
-rw-r--r--release_docs/RELEASE.txt15
-rw-r--r--src/H5FDsubfiling/H5FDsubfiling.c3
-rw-r--r--src/H5FDsubfiling/H5subfiling_common.c33
-rw-r--r--src/H5FDsubfiling/H5subfiling_common.h1
4 files changed, 39 insertions, 13 deletions
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt
index 2d8eb32..d82167e 100644
--- a/release_docs/RELEASE.txt
+++ b/release_docs/RELEASE.txt
@@ -591,6 +591,21 @@ Bug Fixes since HDF5-1.14.0 release
Library
-------
+ - Fixed an issue where the Subfiling VFD's context object cache could
+ grow too large
+
+ The Subfiling VFD keeps a cache of its internal context objects to
+ speed up access to a context object for a particular file, as well
+ as access to that object across multiple opens of the same file.
+ However, opening a large amount of files with the Subfiling VFD over
+ the course of an application's lifetime could cause this cache to grow
+ too large and result in the application running out of available MPI
+ communicator objects. On file close, the Subfiling VFD now simply
+ evicts context objects out of its cache and frees them. It is assumed
+ that multiple opens of a file will be a less common use case for the
+ Subfiling VFD, but this can be revisited if it proves to be an issue
+ for performance.
+
- Fixed error when overwriting certain nested variable length types
Previously, when using a datatype that included a variable length type
diff --git a/src/H5FDsubfiling/H5FDsubfiling.c b/src/H5FDsubfiling/H5FDsubfiling.c
index 4c39f0f..9594f67 100644
--- a/src/H5FDsubfiling/H5FDsubfiling.c
+++ b/src/H5FDsubfiling/H5FDsubfiling.c
@@ -1358,6 +1358,9 @@ done:
H5MM_free(file_ptr->file_dir);
file_ptr->file_dir = NULL;
+ if (file_ptr->context_id >= 0 && H5_free_subfiling_object(file_ptr->context_id) < 0)
+ H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTFREE, FAIL, "can't free subfiling context object");
+
/* Release the file info */
file_ptr = H5FL_FREE(H5FD_subfiling_t, file_ptr);
diff --git a/src/H5FDsubfiling/H5subfiling_common.c b/src/H5FDsubfiling/H5subfiling_common.c
index 1127ae0..47d0624 100644
--- a/src/H5FDsubfiling/H5subfiling_common.c
+++ b/src/H5FDsubfiling/H5subfiling_common.c
@@ -48,7 +48,6 @@ static int sf_file_map_size = 0;
#define DEFAULT_TOPOLOGY_CACHE_SIZE 4
#define DEFAULT_FILE_MAP_ENTRIES 8
-static herr_t H5_free_subfiling_object(int64_t object_id);
static herr_t H5_free_subfiling_object_int(subfiling_context_t *sf_context);
static herr_t H5_free_subfiling_topology(sf_topology_t *topology);
@@ -280,18 +279,25 @@ done:
* Purpose: Frees the underlying subfiling object for a given subfiling
* object ID.
*
- * NOTE: Currently we assume that all created subfiling
- * objects are cached in the (very simple) context/topology
- * cache until application exit, so the only time a subfiling
- * object should be freed by this routine is if something
- * fails right after creating one. Otherwise, the internal
- * indexing for the relevant cache will be invalid.
+ * NOTE: Because we want to avoid the potentially large
+ * overhead of determining the application topology on every
+ * file open, we currently assume that all created subfiling
+ * topology objects are cached in the (very simple) topology
+ * cache until application exit. This allows us to quickly
+ * find and assign a cached topology object to a subfiling
+ * context object for a file when opened. Therefore, a
+ * subfiling topology object should (currently) only ever be
+ * freed by this routine if a function fails right after
+ * creating a topology object. Otherwise, the internal
+ * indexing for the topology cache will be invalid and we will
+ * either leak memory or assign invalid topology pointers to
+ * subfiling context objects after that point.
*
* Return: Non-negative on success/Negative on failure
*
*-------------------------------------------------------------------------
*/
-static herr_t
+herr_t
H5_free_subfiling_object(int64_t object_id)
{
int64_t obj_type = (object_id >> 32) & 0x0FFFF;
@@ -305,30 +311,31 @@ H5_free_subfiling_object(int64_t object_id)
"couldn't get subfiling context for subfiling object ID");
if (H5_free_subfiling_object_int(sf_context) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object");
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling context object");
assert(sf_context_cache_num_entries > 0);
assert(sf_context == sf_context_cache[sf_context_cache_num_entries - 1]);
sf_context_cache[sf_context_cache_num_entries - 1] = NULL;
sf_context_cache_num_entries--;
}
- else {
+ else if (obj_type == SF_TOPOLOGY) {
sf_topology_t *sf_topology;
- assert(obj_type == SF_TOPOLOGY);
-
if (NULL == (sf_topology = H5_get_subfiling_object(object_id)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL,
"couldn't get subfiling context for subfiling object ID");
if (H5_free_subfiling_topology(sf_topology) < 0)
- H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling topology");
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling topology object");
assert(sf_topology_cache_num_entries > 0);
assert(sf_topology == sf_topology_cache[sf_topology_cache_num_entries - 1]);
sf_topology_cache[sf_topology_cache_num_entries - 1] = NULL;
sf_topology_cache_num_entries--;
}
+ else
+ H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL,
+ "couldn't free subfiling object - invalid object type");
done:
H5_SUBFILING_FUNC_LEAVE;
diff --git a/src/H5FDsubfiling/H5subfiling_common.h b/src/H5FDsubfiling/H5subfiling_common.h
index 156902a..6b5cfa4 100644
--- a/src/H5FDsubfiling/H5subfiling_common.h
+++ b/src/H5FDsubfiling/H5subfiling_common.h
@@ -269,6 +269,7 @@ H5_DLL herr_t H5_close_subfiles(int64_t subfiling_context_id, MPI_Comm file_comm
H5_DLL int64_t H5_new_subfiling_object_id(sf_obj_type_t obj_type);
H5_DLL void *H5_get_subfiling_object(int64_t object_id);
+H5_DLL herr_t H5_free_subfiling_object(int64_t object_id);
H5_DLL herr_t H5_get_subfiling_config_from_file(FILE *config_file, int64_t *stripe_size,
int64_t *num_subfiles);
H5_DLL herr_t H5_resolve_pathname(const char *filepath, MPI_Comm comm, char **resolved_filepath);