summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--release_docs/RELEASE.txt16
-rw-r--r--tools/src/h5repack/h5repack_copy.c13
2 files changed, 27 insertions, 2 deletions
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt
index aa48654..b226664 100644
--- a/release_docs/RELEASE.txt
+++ b/release_docs/RELEASE.txt
@@ -184,7 +184,21 @@ Bug Fixes since HDF5-1.14.1 release
Tools
-----
- -
+ - Fixed an issue in h5repack for variable-length typed datasets
+
+ When repacking datasets into a new file, h5repack tries to determines whether
+ it can use H5Ocopy to copy each dataset into the new file, or if it needs to
+ manually re-create the dataset, then read data from the old dataset and write
+ it to the new dataset. H5repack was previously using H5Ocopy for datasets with
+ variable-length datatypes, but this can be problematic if the global heap
+ addresses involved do not match exactly between the old and new files. These
+ addresses could change for a variety of reasons, such as the command-line options
+ provided to h5repack, how h5repack allocate space in the repacked file, etc.
+ Since H5Ocopy does not currently perform any translation when these addresses
+ change, datasets that were repacked with H5Ocopy could become unreadable in the
+ new file. H5repack has been fixed to repack variable-length typed datasets without
+ using H5Ocopy to ensure that the new datasets always have the correct global heap
+ addresses.
Performance
diff --git a/tools/src/h5repack/h5repack_copy.c b/tools/src/h5repack/h5repack_copy.c
index 0f270c5..81b33c3 100644
--- a/tools/src/h5repack/h5repack_copy.c
+++ b/tools/src/h5repack/h5repack_copy.c
@@ -653,6 +653,7 @@ do_copy_objects(hid_t fidin, hid_t fidout, trav_table_t *travt, pack_opt_t *opti
int ifil;
int is_ref = 0;
htri_t is_named;
+ htri_t is_vlen = 0;
hbool_t limit_maxdims;
hsize_t size_dset;
int ret_value = 0;
@@ -806,6 +807,10 @@ do_copy_objects(hid_t fidin, hid_t fidout, trav_table_t *travt, pack_opt_t *opti
if (H5T_REFERENCE == H5Tget_class(ftype_id))
is_ref = 1;
+ /* early detection of variable-length types */
+ if ((is_vlen = H5Tdetect_class(ftype_id, H5T_VLEN)) < 0)
+ H5TOOLS_GOTO_ERROR((-1), "H5Tdetect_class failed");
+
/* Check if the datatype is committed */
if ((is_named = H5Tcommitted(ftype_id)) < 0)
H5TOOLS_GOTO_ERROR((-1), "H5Tcommitted failed");
@@ -823,10 +828,16 @@ do_copy_objects(hid_t fidin, hid_t fidout, trav_table_t *travt, pack_opt_t *opti
* check if we should use H5Ocopy or not
* if there is a request for filters/layout, we read/write the object
* otherwise we do a copy using H5Ocopy
+ *
+ * Note that H5Ocopy is currently unsafe to use for objects that reside in
+ * or interact with global heaps, such as variable-length datatypes. This
+ * appears to be due to H5Ocopy not correctly translating in the case where
+ * these objects move to different global heap addresses in the repacked
+ * file.
*-------------------------------------------------------------------------
*/
use_h5ocopy = !(options->op_tbl->nelems || options->all_filter == 1 ||
- options->all_layout == 1 || is_ref || is_named);
+ options->all_layout == 1 || is_ref || is_vlen || is_named);
/*
* Check if we are using different source and destination VOL connectors.