diff options
-rw-r--r-- | CMakeLists.txt | 12 | ||||
-rw-r--r-- | MANIFEST | 2 | ||||
-rw-r--r-- | config/cmake/H5pubconf.h.in | 3 | ||||
-rw-r--r-- | configure.ac | 17 | ||||
-rw-r--r-- | examples/CMakeLists.txt | 42 | ||||
-rw-r--r-- | examples/CMakeTests.cmake | 40 | ||||
-rw-r--r-- | examples/Makefile.am | 6 | ||||
-rw-r--r-- | examples/ph5_filtered_writes.c | 482 | ||||
-rw-r--r-- | examples/ph5_filtered_writes_no_sel.c | 366 | ||||
-rw-r--r-- | examples/ph5example.c | 8 | ||||
-rw-r--r-- | release_docs/RELEASE.txt | 40 | ||||
-rw-r--r-- | src/H5Dchunk.c | 378 | ||||
-rw-r--r-- | src/H5Dint.c | 55 | ||||
-rw-r--r-- | src/H5Dio.c | 128 | ||||
-rw-r--r-- | src/H5Dmpio.c | 5243 | ||||
-rw-r--r-- | src/H5Dpkg.h | 17 | ||||
-rw-r--r-- | src/H5Dselect.c | 187 | ||||
-rw-r--r-- | src/H5FDmpio.c | 44 | ||||
-rw-r--r-- | src/H5Fmpi.c | 64 | ||||
-rw-r--r-- | src/H5Fprivate.h | 3 | ||||
-rw-r--r-- | src/H5mpi.c | 233 | ||||
-rw-r--r-- | src/H5private.h | 30 | ||||
-rw-r--r-- | src/H5public.h | 7 | ||||
-rw-r--r-- | testpar/t_2Gio.c | 275 | ||||
-rw-r--r-- | testpar/t_dset.c | 277 | ||||
-rw-r--r-- | testpar/t_filters_parallel.c | 4584 | ||||
-rw-r--r-- | testpar/t_filters_parallel.h | 117 | ||||
-rw-r--r-- | testpar/testphdf5.h | 4 |
28 files changed, 9308 insertions, 3356 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index fedce44..cad378b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -708,10 +708,14 @@ if (HDF5_ENABLE_PARALLEL) # Used by Parallel Compression feature set (PARALLEL_FILTERED_WRITES ON) - CHECK_SYMBOL_EXISTS (MPI_Mprobe "mpi.h" H5_HAVE_MPI_Mprobe) - CHECK_SYMBOL_EXISTS (MPI_Imrecv "mpi.h" H5_HAVE_MPI_Imrecv) - if (NOT H5_HAVE_MPI_Mprobe OR NOT H5_HAVE_MPI_Imrecv) - message (WARNING "The MPI_Mprobe and/or MPI_Imrecv functions could not be located. + CHECK_SYMBOL_EXISTS (MPI_Ibarrier "mpi.h" H5_HAVE_MPI_Ibarrier) + CHECK_SYMBOL_EXISTS (MPI_Issend "mpi.h" H5_HAVE_MPI_Issend) + CHECK_SYMBOL_EXISTS (MPI_Iprobe "mpi.h" H5_HAVE_MPI_Iprobe) + CHECK_SYMBOL_EXISTS (MPI_Irecv "mpi.h" H5_HAVE_MPI_Irecv) + if (H5_HAVE_MPI_Ibarrier AND H5_HAVE_MPI_Issend AND H5_HAVE_MPI_Iprobe AND H5_HAVE_MPI_Irecv) + set (H5_HAVE_PARALLEL_FILTERED_WRITES 1) + else () + message (WARNING "The MPI_Ibarrier/MPI_Issend/MPI_Iprobe/MPI_Irecv functions could not be located. Parallel writes of filtered data will be disabled.") set (PARALLEL_FILTERED_WRITES OFF) endif () @@ -333,6 +333,8 @@ ./examples/h5_ref2reg_deprec.c ./examples/h5_shared_mesg.c ./examples/ph5example.c +./examples/ph5_filtered_writes.c +./examples/ph5_filtered_writes_no_sel.c ./examples/h5_vds.c ./examples/h5_vds-exc.c ./examples/h5_vds-exclim.c diff --git a/config/cmake/H5pubconf.h.in b/config/cmake/H5pubconf.h.in index 1f7f4b1..4956c97 100644 --- a/config/cmake/H5pubconf.h.in +++ b/config/cmake/H5pubconf.h.in @@ -265,6 +265,9 @@ /* Define if we have parallel support */ #cmakedefine H5_HAVE_PARALLEL @H5_HAVE_PARALLEL@ +/* Define if we have support for writing to filtered datasets in parallel */ +#cmakedefine H5_HAVE_PARALLEL_FILTERED_WRITES @H5_HAVE_PARALLEL_FILTERED_WRITES@ + /* Define if both pread and pwrite exist. */ #cmakedefine H5_HAVE_PREADWRITE @H5_HAVE_PREADWRITE@ diff --git a/configure.ac b/configure.ac index 4cf329c..55468bf 100644 --- a/configure.ac +++ b/configure.ac @@ -2987,11 +2987,11 @@ if test -n "$PARALLEL"; then fi ## ---------------------------------------------------------------------- - ## Check for the MPI-3 functions necessary for the Parallel Compression + ## Check for the MPI functions necessary for the Parallel Compression ## feature. If these are not present, issue a warning that Parallel ## Compression will be disabled. ## - AC_MSG_CHECKING([for MPI_Mprobe and MPI_Imrecv functions]) + AC_MSG_CHECKING([for MPI_Ibarrier/MPI_Issend/MPI_Iprobe/MPI_Irecv functions]) AC_LINK_IFELSE( [AC_LANG_PROGRAM( @@ -2999,16 +2999,19 @@ if test -n "$PARALLEL"; then #include <mpi.h> ]], [[ - MPI_Message message; + int flag; MPI_Init(0, (void *) 0); - MPI_Mprobe(0, 0, 0, &message, (void *) 0); - MPI_Imrecv((void *) 0, 0, 0, (void *) 0, (void *) 0); + MPI_Ibarrier(0, (void *) 0); + MPI_Issend((void *) 0, 0, 0, 0, 0, 0, (void *) 0); + MPI_Iprobe(0, 0, 0, &flag, (void *) 0); + MPI_Irecv((void *) 0, 0, 0, 0, 0, 0, (void *) 0); ]] )], [AC_MSG_RESULT([yes]) - PARALLEL_FILTERED_WRITES=yes], + PARALLEL_FILTERED_WRITES=yes + AC_DEFINE([HAVE_PARALLEL_FILTERED_WRITES], [1], [Define if we have support for writing to filtered datasets in parallel])], [AC_MSG_RESULT([no]) - AC_MSG_WARN([A simple MPI program using the MPI_Mprobe and MPI_Imrecv functions could not be compiled and linked. + AC_MSG_WARN([A simple MPI program using the MPI_Ibarrier, MPI_Issend, MPI_Iprobe and MPI_Irecv functions could not be compiled and linked. Parallel writes of filtered data will be disabled.]) PARALLEL_FILTERED_WRITES=no] ) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 9ab870f..3f329c1 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -42,6 +42,14 @@ set (examples h5_vds-percival-unlim-maxmin ) +if (H5_HAVE_PARALLEL) + set (parallel_examples + ph5example + ph5_filtered_writes + ph5_filtered_writes_no_sel + ) +endif () + foreach (example ${examples}) add_executable (${example} ${HDF5_EXAMPLES_SOURCE_DIR}/${example}.c) target_include_directories (${example} PRIVATE "${HDF5_SRC_DIR};${HDF5_SRC_BINARY_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>") @@ -63,23 +71,25 @@ foreach (example ${examples}) endforeach () if (H5_HAVE_PARALLEL) - add_executable (ph5example ${HDF5_EXAMPLES_SOURCE_DIR}/ph5example.c) - target_include_directories (ph5example PRIVATE "${HDF5_SRC_DIR};${HDF5_SRC_BINARY_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>") - if (NOT BUILD_SHARED_LIBS) - TARGET_C_PROPERTIES (ph5example STATIC) - target_link_libraries (ph5example PRIVATE ${HDF5_LIB_TARGET} ${MPI_C_LIBRARIES}) - else () - TARGET_C_PROPERTIES (ph5example SHARED) - target_link_libraries (ph5example PRIVATE ${HDF5_LIBSH_TARGET} ${MPI_C_LIBRARIES}) - endif () - set_target_properties (ph5example PROPERTIES FOLDER examples) + foreach (parallel_example ${parallel_examples}) + add_executable (${parallel_example} ${HDF5_EXAMPLES_SOURCE_DIR}/${parallel_example}.c) + target_include_directories (${parallel_example} PRIVATE "${HDF5_SRC_DIR};${HDF5_SRC_BINARY_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>") + if (NOT BUILD_SHARED_LIBS) + TARGET_C_PROPERTIES (${parallel_example} STATIC) + target_link_libraries (${parallel_example} PRIVATE ${HDF5_LIB_TARGET} ${MPI_C_LIBRARIES}) + else () + TARGET_C_PROPERTIES (${parallel_example} SHARED) + target_link_libraries (${parallel_example} PRIVATE ${HDF5_LIBSH_TARGET} ${MPI_C_LIBRARIES}) + endif () + set_target_properties (${parallel_example} PROPERTIES FOLDER examples) - #----------------------------------------------------------------------------- - # Add Target to clang-format - #----------------------------------------------------------------------------- - if (HDF5_ENABLE_FORMATTERS) - clang_format (HDF5_EXAMPLES_ph5example_FORMAT ph5example) - endif () + #----------------------------------------------------------------------------- + # Add Target to clang-format + #----------------------------------------------------------------------------- + if (HDF5_ENABLE_FORMATTERS) + clang_format (HDF5_EXAMPLES_${parallel_example}_FORMAT ${parallel_example}) + endif () + endforeach () endif () if (BUILD_TESTING AND HDF5_TEST_EXAMPLES) diff --git a/examples/CMakeTests.cmake b/examples/CMakeTests.cmake index 70142c8..3e24ba0 100644 --- a/examples/CMakeTests.cmake +++ b/examples/CMakeTests.cmake @@ -101,22 +101,26 @@ if (H5_HAVE_PARALLEL AND HDF5_TEST_PARALLEL AND NOT WIN32) # Ensure that 24 is a multiple of the number of processes. # The number 24 corresponds to SPACE1_DIM1 and SPACE1_DIM2 defined in ph5example.c math(EXPR NUMPROCS "24 / ((24 + ${MPIEXEC_MAX_NUMPROCS} - 1) / ${MPIEXEC_MAX_NUMPROCS})") - if (HDF5_ENABLE_USING_MEMCHECKER) - add_test (NAME MPI_TEST_EXAMPLES-ph5example COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${NUMPROCS} ${MPIEXEC_PREFLAGS} $<TARGET_FILE:ph5example> ${MPIEXEC_POSTFLAGS}) - else () - add_test (NAME MPI_TEST_EXAMPLES-ph5example COMMAND "${CMAKE_COMMAND}" - -D "TEST_PROGRAM=${MPIEXEC_EXECUTABLE};${MPIEXEC_NUMPROC_FLAG};${NUMPROCS};${MPIEXEC_PREFLAGS};$<TARGET_FILE:ph5example>;${MPIEXEC_POSTFLAGS}" - -D "TEST_ARGS:STRING=" - -D "TEST_EXPECT=0" - -D "TEST_OUTPUT=ph5example.out" - -D "TEST_REFERENCE:STRING=PHDF5 tests finished with no errors" - -D "TEST_FILTER:STRING=PHDF5 tests finished with no errors" - -D "TEST_FOLDER=${PROJECT_BINARY_DIR}" - -P "${HDF_RESOURCES_EXT_DIR}/grepTest.cmake" - ) - endif () - if (last_test) - set_tests_properties (MPI_TEST_EXAMPLES-ph5example PROPERTIES DEPENDS ${last_test}) - endif () - set (last_test "MPI_TEST_EXAMPLES-ph5example") + + foreach (parallel_example ${parallel_examples}) + if (HDF5_ENABLE_USING_MEMCHECKER) + add_test (NAME MPI_TEST_EXAMPLES-${parallel_example} COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${NUMPROCS} ${MPIEXEC_PREFLAGS} $<TARGET_FILE:${parallel_example}> ${MPIEXEC_POSTFLAGS}) + else () + add_test (NAME MPI_TEST_EXAMPLES-${parallel_example} COMMAND "${CMAKE_COMMAND}" + -D "TEST_PROGRAM=${MPIEXEC_EXECUTABLE};${MPIEXEC_NUMPROC_FLAG};${NUMPROCS};${MPIEXEC_PREFLAGS};$<TARGET_FILE:${parallel_example}>;${MPIEXEC_POSTFLAGS}" + -D "TEST_ARGS:STRING=" + -D "TEST_EXPECT=0" + -D "TEST_SKIP_COMPARE=TRUE" + -D "TEST_OUTPUT=${parallel_example}.out" + -D "TEST_REFERENCE:STRING=PHDF5 example finished with no errors" + #-D "TEST_FILTER:STRING=PHDF5 tests finished with no errors" + -D "TEST_FOLDER=${PROJECT_BINARY_DIR}" + -P "${HDF_RESOURCES_EXT_DIR}/grepTest.cmake" + ) + endif () + if (last_test) + set_tests_properties (MPI_TEST_EXAMPLES-${parallel_example} PROPERTIES DEPENDS ${last_test}) + endif () + set (last_test "MPI_TEST_EXAMPLES-${parallel_example}") + endforeach () endif () diff --git a/examples/Makefile.am b/examples/Makefile.am index 7b5aa63..161f789 100644 --- a/examples/Makefile.am +++ b/examples/Makefile.am @@ -20,7 +20,7 @@ include $(top_srcdir)/config/commence.am if BUILD_PARALLEL_CONDITIONAL - EXAMPLE_PROG_PARA = ph5example + EXAMPLE_PROG_PARA = ph5example ph5_filtered_writes ph5_filtered_writes_no_sel endif INSTALL_SCRIPT_FILES = run-c-ex.sh @@ -50,7 +50,7 @@ INSTALL_FILES = h5_write.c h5_read.c h5_extend_write.c h5_chunk_read.c h5_compou h5_group.c h5_select.c h5_attribute.c h5_mount.c h5_drivers.c \ h5_reference_deprec.c h5_ref_extern.c h5_ref_compat.c h5_ref2reg_deprec.c \ h5_extlink.c h5_elink_unix2win.c h5_shared_mesg.c h5_debug_trace.c \ - ph5example.c \ + ph5example.c ph5_filtered_writes.c ph5_filtered_writes_no_sel.c \ h5_vds.c h5_vds-exc.c h5_vds-exclim.c h5_vds-eiger.c h5_vds-simpleIO.c \ h5_vds-percival.c h5_vds-percival-unlim.c h5_vds-percival-unlim-maxmin.c @@ -119,6 +119,8 @@ h5_reference_deprec: $(srcdir)/h5_reference_deprec.c h5_ref2reg_deprec: $(srcdir)/h5_ref2reg_deprec.c h5_drivers: $(srcdir)/h5_drivers.c ph5example: $(srcdir)/ph5example.c +ph5_filtered_writes: $(srcdir)/ph5_filtered_writes.c +ph5_filtered_writes_no_sel: $(srcdir)/ph5_filtered_writes_no_sel.c h5_dtransform: $(srcdir)/h5_dtransform.c h5_extlink: $(srcdir)/h5_extlink.c $(EXTLINK_DIRS) h5_elink_unix2win: $(srcdir)/h5_elink_unix2win.c $(EXTLINK_DIRS) diff --git a/examples/ph5_filtered_writes.c b/examples/ph5_filtered_writes.c new file mode 100644 index 0000000..8b55528 --- /dev/null +++ b/examples/ph5_filtered_writes.c @@ -0,0 +1,482 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the root of the source code * + * distribution tree, or in https://www.hdfgroup.org/licenses. * + * If you do not have access to either file, you may request a copy from * + * help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Example of using the parallel HDF5 library to write to datasets + * with filters applied to them. + * + * If the HDF5_NOCLEANUP environment variable is set, the file that + * this example creates will not be removed as the example finishes. + * + * The need of requirement of parallel file prefix is that in general + * the current working directory in which compiling is done, is not suitable + * for parallel I/O and there is no standard pathname for parallel file + * systems. In some cases, the parallel file name may even need some + * parallel file type prefix such as: "pfs:/GF/...". Therefore, this + * example parses the HDF5_PARAPREFIX environment variable for a prefix, + * if one is needed. + */ + +#include <stdlib.h> + +#include "hdf5.h" + +#if defined(H5_HAVE_PARALLEL) && defined(H5_HAVE_PARALLEL_FILTERED_WRITES) + +#define EXAMPLE_FILE "ph5_filtered_writes.h5" +#define EXAMPLE_DSET1_NAME "DSET1" +#define EXAMPLE_DSET2_NAME "DSET2" + +#define EXAMPLE_DSET_DIMS 2 +#define EXAMPLE_DSET_CHUNK_DIM_SIZE 10 + +/* Dataset datatype */ +#define HDF5_DATATYPE H5T_NATIVE_INT +typedef int C_DATATYPE; + +/* Global variables */ +int mpi_rank, mpi_size; + +/* + * Routine to set an HDF5 filter on the given DCPL + */ +static void +set_filter(hid_t dcpl_id) +{ + htri_t filter_avail; + + /* + * Check if 'deflate' filter is available + */ + filter_avail = H5Zfilter_avail(H5Z_FILTER_DEFLATE); + if (filter_avail < 0) + return; + else if (filter_avail) { + /* + * Set 'deflate' filter with reasonable + * compression level on DCPL + */ + H5Pset_deflate(dcpl_id, 6); + } + else { + /* + * Set Fletcher32 checksum filter on DCPL + * since it is always available in HDF5 + */ + H5Pset_fletcher32(dcpl_id); + } +} + +/* + * Routine to fill a data buffer with data. Assumes + * dimension rank is 2 and data is stored contiguous. + */ +void +fill_databuf(hsize_t start[], hsize_t count[], hsize_t stride[], C_DATATYPE *data) +{ + C_DATATYPE *dataptr = data; + hsize_t i, j; + + /* Use MPI rank value for data */ + for (i = 0; i < count[0]; i++) { + for (j = 0; j < count[1]; j++) { + *dataptr++ = mpi_rank; + } + } +} + +/* Cleanup created file */ +static void +cleanup(char *filename) +{ + hbool_t do_cleanup = getenv(HDF5_NOCLEANUP) ? 0 : 1; + + if (do_cleanup) + MPI_File_delete(filename, MPI_INFO_NULL); +} + +/* + * Routine to write to a dataset in a fashion + * where no chunks in the dataset are written + * to by more than 1 MPI rank. This will + * generally give the best performance as the + * MPI ranks will need the least amount of + * inter-process communication. + */ +static void +write_dataset_no_overlap(hid_t file_id, hid_t dxpl_id) +{ + C_DATATYPE data[EXAMPLE_DSET_CHUNK_DIM_SIZE][4 * EXAMPLE_DSET_CHUNK_DIM_SIZE]; + hsize_t dataset_dims[EXAMPLE_DSET_DIMS]; + hsize_t chunk_dims[EXAMPLE_DSET_DIMS]; + hsize_t start[EXAMPLE_DSET_DIMS]; + hsize_t stride[EXAMPLE_DSET_DIMS]; + hsize_t count[EXAMPLE_DSET_DIMS]; + size_t i, j; + hid_t dset_id = H5I_INVALID_HID; + hid_t dcpl_id = H5I_INVALID_HID; + hid_t file_dataspace = H5I_INVALID_HID; + + /* + * ------------------------------------ + * Setup Dataset Creation Property List + * ------------------------------------ + */ + + dcpl_id = H5Pcreate(H5P_DATASET_CREATE); + + /* + * REQUIRED: Dataset chunking must be enabled to + * apply a data filter to the dataset. + * Chunks in the dataset are of size + * EXAMPLE_DSET_CHUNK_DIM_SIZE x EXAMPLE_DSET_CHUNK_DIM_SIZE. + */ + chunk_dims[0] = EXAMPLE_DSET_CHUNK_DIM_SIZE; + chunk_dims[1] = EXAMPLE_DSET_CHUNK_DIM_SIZE; + H5Pset_chunk(dcpl_id, EXAMPLE_DSET_DIMS, chunk_dims); + + /* Set filter to be applied to created datasets */ + set_filter(dcpl_id); + + /* + * ------------------------------------ + * Define the dimensions of the dataset + * and create it + * ------------------------------------ + */ + + /* + * Create a dataset composed of 4 chunks + * per MPI rank. The first dataset dimension + * scales according to the number of MPI ranks. + * The second dataset dimension stays fixed + * according to the chunk size. + */ + dataset_dims[0] = EXAMPLE_DSET_CHUNK_DIM_SIZE * mpi_size; + dataset_dims[1] = 4 * EXAMPLE_DSET_CHUNK_DIM_SIZE; + + file_dataspace = H5Screate_simple(EXAMPLE_DSET_DIMS, dataset_dims, NULL); + + /* Create the dataset */ + dset_id = H5Dcreate2(file_id, EXAMPLE_DSET1_NAME, HDF5_DATATYPE, file_dataspace, H5P_DEFAULT, dcpl_id, + H5P_DEFAULT); + + /* + * ------------------------------------ + * Setup selection in the dataset for + * each MPI rank + * ------------------------------------ + */ + + /* + * Each MPI rank's selection covers a + * single chunk in the first dataset + * dimension. Each MPI rank's selection + * covers 4 chunks in the second dataset + * dimension. This leads to each MPI rank + * writing to 4 chunks of the dataset. + */ + start[0] = mpi_rank * EXAMPLE_DSET_CHUNK_DIM_SIZE; + start[1] = 0; + stride[0] = 1; + stride[1] = 1; + count[0] = EXAMPLE_DSET_CHUNK_DIM_SIZE; + count[1] = 4 * EXAMPLE_DSET_CHUNK_DIM_SIZE; + + H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, NULL); + + /* + * -------------------------------------- + * Fill data buffer with MPI rank's rank + * value to make it easy to see which + * part of the dataset each rank wrote to + * -------------------------------------- + */ + + fill_databuf(start, count, stride, &data[0][0]); + + /* + * --------------------------------- + * Write to the dataset collectively + * --------------------------------- + */ + + H5Dwrite(dset_id, HDF5_DATATYPE, H5S_BLOCK, file_dataspace, dxpl_id, data); + + /* + * -------------- + * Close HDF5 IDs + * -------------- + */ + + H5Sclose(file_dataspace); + H5Pclose(dcpl_id); + H5Dclose(dset_id); +} + +/* + * Routine to write to a dataset in a fashion + * where every chunk in the dataset is written + * to by every MPI rank. This will generally + * give the worst performance as the MPI ranks + * will need the most amount of inter-process + * communication. + */ +static void +write_dataset_overlap(hid_t file_id, hid_t dxpl_id) +{ + C_DATATYPE data[mpi_size][EXAMPLE_DSET_CHUNK_DIM_SIZE]; + hsize_t dataset_dims[EXAMPLE_DSET_DIMS]; + hsize_t chunk_dims[EXAMPLE_DSET_DIMS]; + hsize_t start[EXAMPLE_DSET_DIMS]; + hsize_t stride[EXAMPLE_DSET_DIMS]; + hsize_t count[EXAMPLE_DSET_DIMS]; + size_t i, j; + hid_t dset_id = H5I_INVALID_HID; + hid_t dcpl_id = H5I_INVALID_HID; + hid_t file_dataspace = H5I_INVALID_HID; + + /* + * ------------------------------------ + * Setup Dataset Creation Property List + * ------------------------------------ + */ + + dcpl_id = H5Pcreate(H5P_DATASET_CREATE); + + /* + * REQUIRED: Dataset chunking must be enabled to + * apply a data filter to the dataset. + * Chunks in the dataset are of size + * mpi_size x EXAMPLE_DSET_CHUNK_DIM_SIZE. + */ + chunk_dims[0] = mpi_size; + chunk_dims[1] = EXAMPLE_DSET_CHUNK_DIM_SIZE; + H5Pset_chunk(dcpl_id, EXAMPLE_DSET_DIMS, chunk_dims); + + /* Set filter to be applied to created datasets */ + set_filter(dcpl_id); + + /* + * ------------------------------------ + * Define the dimensions of the dataset + * and create it + * ------------------------------------ + */ + + /* + * Create a dataset composed of N chunks, + * where N is the number of MPI ranks. The + * first dataset dimension scales according + * to the number of MPI ranks. The second + * dataset dimension stays fixed according + * to the chunk size. + */ + dataset_dims[0] = mpi_size * chunk_dims[0]; + dataset_dims[1] = EXAMPLE_DSET_CHUNK_DIM_SIZE; + + file_dataspace = H5Screate_simple(EXAMPLE_DSET_DIMS, dataset_dims, NULL); + + /* Create the dataset */ + dset_id = H5Dcreate2(file_id, EXAMPLE_DSET2_NAME, HDF5_DATATYPE, file_dataspace, H5P_DEFAULT, dcpl_id, + H5P_DEFAULT); + + /* + * ------------------------------------ + * Setup selection in the dataset for + * each MPI rank + * ------------------------------------ + */ + + /* + * Each MPI rank's selection covers + * part of every chunk in the first + * dimension. Each MPI rank's selection + * covers all of every chunk in the + * second dimension. This leads to + * each MPI rank writing an equal + * amount of data to every chunk + * in the dataset. + */ + start[0] = mpi_rank; + start[1] = 0; + stride[0] = chunk_dims[0]; + stride[1] = 1; + count[0] = mpi_size; + count[1] = EXAMPLE_DSET_CHUNK_DIM_SIZE; + + H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, NULL); + + /* + * -------------------------------------- + * Fill data buffer with MPI rank's rank + * value to make it easy to see which + * part of the dataset each rank wrote to + * -------------------------------------- + */ + + fill_databuf(start, count, stride, &data[0][0]); + + /* + * --------------------------------- + * Write to the dataset collectively + * --------------------------------- + */ + + H5Dwrite(dset_id, HDF5_DATATYPE, H5S_BLOCK, file_dataspace, dxpl_id, data); + + /* + * -------------- + * Close HDF5 IDs + * -------------- + */ + + H5Sclose(file_dataspace); + H5Pclose(dcpl_id); + H5Dclose(dset_id); +} + +int +main(int argc, char **argv) +{ + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Info info = MPI_INFO_NULL; + hid_t file_id = H5I_INVALID_HID; + hid_t fapl_id = H5I_INVALID_HID; + hid_t dxpl_id = H5I_INVALID_HID; + char * par_prefix = NULL; + char filename[PATH_MAX]; + + MPI_Init(&argc, &argv); + MPI_Comm_size(comm, &mpi_size); + MPI_Comm_rank(comm, &mpi_rank); + + /* + * ---------------------------------- + * Start parallel access to HDF5 file + * ---------------------------------- + */ + + /* Setup File Access Property List with parallel I/O access */ + fapl_id = H5Pcreate(H5P_FILE_ACCESS); + H5Pset_fapl_mpio(fapl_id, comm, info); + + /* + * OPTIONAL: Set collective metadata reads on FAPL to allow + * parallel writes to filtered datasets to perform + * better at scale. While not strictly necessary, + * this is generally recommended. + */ + H5Pset_all_coll_metadata_ops(fapl_id, true); + + /* + * OPTIONAL: Set the latest file format version for HDF5 in + * order to gain access to different dataset chunk + * index types and better data encoding methods. + * While not strictly necessary, this is generally + * recommended. + */ + H5Pset_libver_bounds(fapl_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST); + + /* Parse any parallel prefix and create filename */ + par_prefix = getenv("HDF5_PARAPREFIX"); + + snprintf(filename, PATH_MAX, "%s%s%s", par_prefix ? par_prefix : "", par_prefix ? "/" : "", EXAMPLE_FILE); + + /* Create HDF5 file */ + file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id); + + /* + * -------------------------------------- + * Setup Dataset Transfer Property List + * with collective I/O + * -------------------------------------- + */ + + dxpl_id = H5Pcreate(H5P_DATASET_XFER); + + /* + * REQUIRED: Setup collective I/O for the dataset + * write operations. Parallel writes to + * filtered datasets MUST be collective, + * even if some ranks have no data to + * contribute to the write operation. + * + * Refer to the 'ph5_filtered_writes_no_sel' + * example to see how to setup a dataset + * write when one or more MPI ranks have + * no data to contribute to the write + * operation. + */ + H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE); + + /* + * -------------------------------- + * Create and write to each dataset + * -------------------------------- + */ + + /* + * Write to a dataset in a fashion where no + * chunks in the dataset are written to by + * more than 1 MPI rank. This will generally + * give the best performance as the MPI ranks + * will need the least amount of inter-process + * communication. + */ + write_dataset_no_overlap(file_id, dxpl_id); + + /* + * Write to a dataset in a fashion where + * every chunk in the dataset is written + * to by every MPI rank. This will generally + * give the worst performance as the MPI ranks + * will need the most amount of inter-process + * communication. + */ + write_dataset_overlap(file_id, dxpl_id); + + /* + * ------------------ + * Close all HDF5 IDs + * ------------------ + */ + + H5Pclose(dxpl_id); + H5Pclose(fapl_id); + H5Fclose(file_id); + + printf("PHDF5 example finished with no errors\n"); + + /* + * ------------------------------------ + * Cleanup created HDF5 file and finish + * ------------------------------------ + */ + + cleanup(filename); + + MPI_Finalize(); + + return 0; +} + +#else + +int +main(void) +{ + printf("HDF5 not configured with parallel support or parallel filtered writes are disabled!\n"); + return 0; +} + +#endif diff --git a/examples/ph5_filtered_writes_no_sel.c b/examples/ph5_filtered_writes_no_sel.c new file mode 100644 index 0000000..14c68c8 --- /dev/null +++ b/examples/ph5_filtered_writes_no_sel.c @@ -0,0 +1,366 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the root of the source code * + * distribution tree, or in https://www.hdfgroup.org/licenses. * + * If you do not have access to either file, you may request a copy from * + * help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Example of using the parallel HDF5 library to collectively write to + * datasets with filters applied to them when one or MPI ranks do not + * have data to contribute to the dataset. + * + * If the HDF5_NOCLEANUP environment variable is set, the file that + * this example creates will not be removed as the example finishes. + * + * The need of requirement of parallel file prefix is that in general + * the current working directory in which compiling is done, is not suitable + * for parallel I/O and there is no standard pathname for parallel file + * systems. In some cases, the parallel file name may even need some + * parallel file type prefix such as: "pfs:/GF/...". Therefore, this + * example parses the HDF5_PARAPREFIX environment variable for a prefix, + * if one is needed. + */ + +#include <stdlib.h> + +#include "hdf5.h" + +#if defined(H5_HAVE_PARALLEL) && defined(H5_HAVE_PARALLEL_FILTERED_WRITES) + +#define EXAMPLE_FILE "ph5_filtered_writes_no_sel.h5" +#define EXAMPLE_DSET_NAME "DSET" + +#define EXAMPLE_DSET_DIMS 2 +#define EXAMPLE_DSET_CHUNK_DIM_SIZE 10 + +/* Dataset datatype */ +#define HDF5_DATATYPE H5T_NATIVE_INT +typedef int C_DATATYPE; + +/* Global variables */ +int mpi_rank, mpi_size; + +/* + * Routine to set an HDF5 filter on the given DCPL + */ +static void +set_filter(hid_t dcpl_id) +{ + htri_t filter_avail; + + /* + * Check if 'deflate' filter is available + */ + filter_avail = H5Zfilter_avail(H5Z_FILTER_DEFLATE); + if (filter_avail < 0) + return; + else if (filter_avail) { + /* + * Set 'deflate' filter with reasonable + * compression level on DCPL + */ + H5Pset_deflate(dcpl_id, 6); + } + else { + /* + * Set Fletcher32 checksum filter on DCPL + * since it is always available in HDF5 + */ + H5Pset_fletcher32(dcpl_id); + } +} + +/* + * Routine to fill a data buffer with data. Assumes + * dimension rank is 2 and data is stored contiguous. + */ +void +fill_databuf(hsize_t start[], hsize_t count[], hsize_t stride[], C_DATATYPE *data) +{ + C_DATATYPE *dataptr = data; + hsize_t i, j; + + /* Use MPI rank value for data */ + for (i = 0; i < count[0]; i++) { + for (j = 0; j < count[1]; j++) { + *dataptr++ = mpi_rank; + } + } +} + +/* Cleanup created file */ +static void +cleanup(char *filename) +{ + hbool_t do_cleanup = getenv(HDF5_NOCLEANUP) ? 0 : 1; + + if (do_cleanup) + MPI_File_delete(filename, MPI_INFO_NULL); +} + +/* + * Routine to write to a dataset in a fashion + * where no chunks in the dataset are written + * to by more than 1 MPI rank. This will + * generally give the best performance as the + * MPI ranks will need the least amount of + * inter-process communication. + */ +static void +write_dataset_some_no_sel(hid_t file_id, hid_t dxpl_id) +{ + C_DATATYPE data[EXAMPLE_DSET_CHUNK_DIM_SIZE][4 * EXAMPLE_DSET_CHUNK_DIM_SIZE]; + hsize_t dataset_dims[EXAMPLE_DSET_DIMS]; + hsize_t chunk_dims[EXAMPLE_DSET_DIMS]; + hsize_t start[EXAMPLE_DSET_DIMS]; + hsize_t stride[EXAMPLE_DSET_DIMS]; + hsize_t count[EXAMPLE_DSET_DIMS]; + hbool_t no_selection; + size_t i, j; + hid_t dset_id = H5I_INVALID_HID; + hid_t dcpl_id = H5I_INVALID_HID; + hid_t file_dataspace = H5I_INVALID_HID; + + /* + * ------------------------------------ + * Setup Dataset Creation Property List + * ------------------------------------ + */ + + dcpl_id = H5Pcreate(H5P_DATASET_CREATE); + + /* + * REQUIRED: Dataset chunking must be enabled to + * apply a data filter to the dataset. + * Chunks in the dataset are of size + * EXAMPLE_DSET_CHUNK_DIM_SIZE x EXAMPLE_DSET_CHUNK_DIM_SIZE. + */ + chunk_dims[0] = EXAMPLE_DSET_CHUNK_DIM_SIZE; + chunk_dims[1] = EXAMPLE_DSET_CHUNK_DIM_SIZE; + H5Pset_chunk(dcpl_id, EXAMPLE_DSET_DIMS, chunk_dims); + + /* Set filter to be applied to created datasets */ + set_filter(dcpl_id); + + /* + * ------------------------------------ + * Define the dimensions of the dataset + * and create it + * ------------------------------------ + */ + + /* + * Create a dataset composed of 4 chunks + * per MPI rank. The first dataset dimension + * scales according to the number of MPI ranks. + * The second dataset dimension stays fixed + * according to the chunk size. + */ + dataset_dims[0] = EXAMPLE_DSET_CHUNK_DIM_SIZE * mpi_size; + dataset_dims[1] = 4 * EXAMPLE_DSET_CHUNK_DIM_SIZE; + + file_dataspace = H5Screate_simple(EXAMPLE_DSET_DIMS, dataset_dims, NULL); + + /* Create the dataset */ + dset_id = H5Dcreate2(file_id, EXAMPLE_DSET_NAME, HDF5_DATATYPE, file_dataspace, H5P_DEFAULT, dcpl_id, + H5P_DEFAULT); + + /* + * ------------------------------------ + * Setup selection in the dataset for + * each MPI rank + * ------------------------------------ + */ + + /* + * Odd rank value MPI ranks do not + * contribute any data to the dataset. + */ + no_selection = (mpi_rank % 2) == 1; + + if (no_selection) { + /* + * MPI ranks not contributing data to + * the dataset should call H5Sselect_none + * on the file dataspace that will be + * passed to H5Dwrite. + */ + H5Sselect_none(file_dataspace); + } + else { + /* + * Even MPI ranks contribute data to + * the dataset. Each MPI rank's selection + * covers a single chunk in the first dataset + * dimension. Each MPI rank's selection + * covers 4 chunks in the second dataset + * dimension. This leads to each contributing + * MPI rank writing to 4 chunks of the dataset. + */ + start[0] = mpi_rank * EXAMPLE_DSET_CHUNK_DIM_SIZE; + start[1] = 0; + stride[0] = 1; + stride[1] = 1; + count[0] = EXAMPLE_DSET_CHUNK_DIM_SIZE; + count[1] = 4 * EXAMPLE_DSET_CHUNK_DIM_SIZE; + + H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, NULL); + + /* + * -------------------------------------- + * Fill data buffer with MPI rank's rank + * value to make it easy to see which + * part of the dataset each rank wrote to + * -------------------------------------- + */ + + fill_databuf(start, count, stride, &data[0][0]); + } + + /* + * --------------------------------- + * Write to the dataset collectively + * --------------------------------- + */ + + H5Dwrite(dset_id, HDF5_DATATYPE, no_selection ? H5S_ALL : H5S_BLOCK, file_dataspace, dxpl_id, data); + + /* + * -------------- + * Close HDF5 IDs + * -------------- + */ + + H5Sclose(file_dataspace); + H5Pclose(dcpl_id); + H5Dclose(dset_id); +} + +int +main(int argc, char **argv) +{ + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Info info = MPI_INFO_NULL; + hid_t file_id = H5I_INVALID_HID; + hid_t fapl_id = H5I_INVALID_HID; + hid_t dxpl_id = H5I_INVALID_HID; + char * par_prefix = NULL; + char filename[PATH_MAX]; + + MPI_Init(&argc, &argv); + MPI_Comm_size(comm, &mpi_size); + MPI_Comm_rank(comm, &mpi_rank); + + /* + * ---------------------------------- + * Start parallel access to HDF5 file + * ---------------------------------- + */ + + /* Setup File Access Property List with parallel I/O access */ + fapl_id = H5Pcreate(H5P_FILE_ACCESS); + H5Pset_fapl_mpio(fapl_id, comm, info); + + /* + * OPTIONAL: Set collective metadata reads on FAPL to allow + * parallel writes to filtered datasets to perform + * better at scale. While not strictly necessary, + * this is generally recommended. + */ + H5Pset_all_coll_metadata_ops(fapl_id, true); + + /* + * OPTIONAL: Set the latest file format version for HDF5 in + * order to gain access to different dataset chunk + * index types and better data encoding methods. + * While not strictly necessary, this is generally + * recommended. + */ + H5Pset_libver_bounds(fapl_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST); + + /* Parse any parallel prefix and create filename */ + par_prefix = getenv("HDF5_PARAPREFIX"); + + snprintf(filename, PATH_MAX, "%s%s%s", par_prefix ? par_prefix : "", par_prefix ? "/" : "", EXAMPLE_FILE); + + /* Create HDF5 file */ + file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id); + + /* + * -------------------------------------- + * Setup Dataset Transfer Property List + * with collective I/O + * -------------------------------------- + */ + + dxpl_id = H5Pcreate(H5P_DATASET_XFER); + + /* + * REQUIRED: Setup collective I/O for the dataset + * write operations. Parallel writes to + * filtered datasets MUST be collective, + * even if some ranks have no data to + * contribute to the write operation. + */ + H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE); + + /* + * -------------------------------- + * Create and write to the dataset + * -------------------------------- + */ + + /* + * Write to a dataset in a fashion where no + * chunks in the dataset are written to by + * more than 1 MPI rank and some MPI ranks + * have nothing to contribute to the dataset. + * In this case, the MPI ranks that have no + * data to contribute must still participate + * in the collective H5Dwrite call, but should + * call H5Sselect_none on the file dataspace + * passed to the H5Dwrite call. + */ + write_dataset_some_no_sel(file_id, dxpl_id); + + /* + * ------------------ + * Close all HDF5 IDs + * ------------------ + */ + + H5Pclose(dxpl_id); + H5Pclose(fapl_id); + H5Fclose(file_id); + + printf("PHDF5 example finished with no errors\n"); + + /* + * ------------------------------------ + * Cleanup created HDF5 file and finish + * ------------------------------------ + */ + + cleanup(filename); + + MPI_Finalize(); + + return 0; +} + +#else + +int +main(void) +{ + printf("HDF5 not configured with parallel support or parallel filtered writes are disabled!\n"); + return 0; +} + +#endif diff --git a/examples/ph5example.c b/examples/ph5example.c index 23af477..36fbfd5 100644 --- a/examples/ph5example.c +++ b/examples/ph5example.c @@ -1073,11 +1073,11 @@ main(int argc, char **argv) finish: if (mpi_rank == 0) { /* only process 0 reports */ if (nerrors) - printf("***PHDF5 tests detected %d errors***\n", nerrors); + printf("***PHDF5 example detected %d errors***\n", nerrors); else { - printf("===================================\n"); - printf("PHDF5 tests finished with no errors\n"); - printf("===================================\n"); + printf("=====================================\n"); + printf("PHDF5 example finished with no errors\n"); + printf("=====================================\n"); } } if (docleanup) diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index 10663f2..a165433 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -883,8 +883,46 @@ New Features Parallel Library: ----------------- - - + - Several improvements to parallel compression feature, including: + + * Improved support for collective I/O (for both writes and reads) + + * Significant reduction of memory usage for the feature as a whole + + * Reduction of copying of application data buffers passed to H5Dwrite + + * Addition of support for incremental file space allocation for filtered + datasets created in parallel. Incremental file space allocation is the + default for these types of datasets (early file space allocation is + also still supported), while early file space allocation is still the + default (and only supported allocation time) for unfiltered datasets + created in parallel. Incremental file space allocation should help with + parallel HDF5 applications that wish to use fill values on filtered + datasets, but would typically avoid doing so since dataset creation in + parallel would often take an excessive amount of time. Since these + datasets previously used early file space allocation, HDF5 would + allocate space for and write fill values to every chunk in the dataset + at creation time, leading to noticeable overhead. Instead, with + incremental file space allocation, allocation of file space for chunks + and writing of fill values to those chunks will be delayed until each + individual chunk is initially written to. + + * Addition of support for HDF5's "don't filter partial edge chunks" flag + (https://portal.hdfgroup.org/display/HDF5/H5P_SET_CHUNK_OPTS) + + * Addition of proper support for HDF5 fill values with the feature + + * Addition of 'H5_HAVE_PARALLEL_FILTERED_WRITES' macro to H5pubconf.h + so HDF5 applications can determine at compile-time whether the feature + is available + + * Addition of simple examples (ph5_filtered_writes.c and + ph5_filtered_writes_no_sel.c) under examples directory to demonstrate + usage of the feature + + * Improved coverage of regression testing for the feature + (JTH - 2022/2/23) Fortran Library: ---------------- diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c index 1b0e579..e4d8706 100644 --- a/src/H5Dchunk.c +++ b/src/H5Dchunk.c @@ -239,10 +239,14 @@ typedef struct H5D_chunk_file_iter_ud_t { #ifdef H5_HAVE_PARALLEL /* information to construct a collective I/O operation for filling chunks */ -typedef struct H5D_chunk_coll_info_t { - size_t num_io; /* Number of write operations */ - haddr_t *addr; /* array of the file addresses of the write operation */ -} H5D_chunk_coll_info_t; +typedef struct H5D_chunk_coll_fill_info_t { + size_t num_chunks; /* Number of chunks in the write operation */ + struct chunk_coll_fill_info { + haddr_t addr; /* File address of the chunk */ + size_t chunk_size; /* Size of the chunk in the file */ + hbool_t unfiltered_partial_chunk; + } * chunk_info; +} H5D_chunk_coll_fill_info_t; #endif /* H5_HAVE_PARALLEL */ typedef struct H5D_chunk_iter_ud_t { @@ -287,9 +291,6 @@ static int H5D__chunk_format_convert_cb(const H5D_chunk_rec_t *chunk_rec, void * /* Helper routines */ static herr_t H5D__chunk_set_info_real(H5O_layout_chunk_t *layout, unsigned ndims, const hsize_t *curr_dims, const hsize_t *max_dims); -static void * H5D__chunk_mem_alloc(size_t size, const H5O_pline_t *pline); -static void * H5D__chunk_mem_xfree(void *chk, const void *pline); -static void * H5D__chunk_mem_realloc(void *chk, size_t size, const H5O_pline_t *pline); static herr_t H5D__chunk_cinfo_cache_reset(H5D_chunk_cached_t *last); static herr_t H5D__chunk_cinfo_cache_update(H5D_chunk_cached_t *last, const H5D_chunk_ud_t *udata); static hbool_t H5D__chunk_cinfo_cache_found(const H5D_chunk_cached_t *last, H5D_chunk_ud_t *udata); @@ -306,8 +307,6 @@ static herr_t H5D__chunk_mem_cb(void *elem, const H5T_t *type, unsigned ndims, static unsigned H5D__chunk_hash_val(const H5D_shared_t *shared, const hsize_t *scaled); static herr_t H5D__chunk_flush_entry(const H5D_t *dset, H5D_rdcc_ent_t *ent, hbool_t reset); static herr_t H5D__chunk_cache_evict(const H5D_t *dset, H5D_rdcc_ent_t *ent, hbool_t flush); -static hbool_t H5D__chunk_is_partial_edge_chunk(unsigned dset_ndims, const uint32_t *chunk_dims, - const hsize_t *chunk_scaled, const hsize_t *dset_dims); static void * H5D__chunk_lock(const H5D_io_info_t *io_info, H5D_chunk_ud_t *udata, hbool_t relax, hbool_t prev_unfilt_chunk); static herr_t H5D__chunk_unlock(const H5D_io_info_t *io_info, const H5D_chunk_ud_t *udata, hbool_t dirty, @@ -315,9 +314,9 @@ static herr_t H5D__chunk_unlock(const H5D_io_info_t *io_info, const H5D_chunk_ static herr_t H5D__chunk_cache_prune(const H5D_t *dset, size_t size); static herr_t H5D__chunk_prune_fill(H5D_chunk_it_ud1_t *udata, hbool_t new_unfilt_chunk); #ifdef H5_HAVE_PARALLEL -static herr_t H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_info_t *chunk_info, - size_t chunk_size, const void *fill_buf); -static int H5D__chunk_cmp_addr(const void *addr1, const void *addr2); +static herr_t H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_fill_info_t *chunk_fill_info, + const void *fill_buf, const void *partial_chunk_fill_buf); +static int H5D__chunk_cmp_coll_fill_info(const void *_entry1, const void *_entry2); #endif /* H5_HAVE_PARALLEL */ /* Debugging helper routine callback */ @@ -1362,7 +1361,7 @@ done: * *------------------------------------------------------------------------- */ -static void * +void * H5D__chunk_mem_alloc(size_t size, const H5O_pline_t *pline) { void *ret_value = NULL; /* Return value */ @@ -1393,7 +1392,7 @@ H5D__chunk_mem_alloc(size_t size, const H5O_pline_t *pline) * *------------------------------------------------------------------------- */ -static void * +void * H5D__chunk_mem_xfree(void *chk, const void *_pline) { const H5O_pline_t *pline = (const H5O_pline_t *)_pline; @@ -1417,7 +1416,7 @@ H5D__chunk_mem_xfree(void *chk, const void *_pline) * calls H5D__chunk_mem_xfree and discards the return value. *------------------------------------------------------------------------- */ -static void +void H5D__chunk_mem_free(void *chk, const void *_pline) { (void)H5D__chunk_mem_xfree(chk, _pline); @@ -1437,7 +1436,7 @@ H5D__chunk_mem_free(void *chk, const void *_pline) * *------------------------------------------------------------------------- */ -static void * +void * H5D__chunk_mem_realloc(void *chk, size_t size, const H5O_pline_t *pline) { void *ret_value = NULL; /* Return value */ @@ -4320,8 +4319,8 @@ H5D__chunk_allocate(const H5D_io_info_t *io_info, hbool_t full_overwrite, const hbool_t blocks_written = FALSE; /* Flag to indicate that chunk was actually written */ hbool_t using_mpi = FALSE; /* Flag to indicate that the file is being accessed with an MPI-capable file driver */ - H5D_chunk_coll_info_t chunk_info; /* chunk address information for doing I/O */ -#endif /* H5_HAVE_PARALLEL */ + H5D_chunk_coll_fill_info_t chunk_fill_info; /* chunk address information for doing I/O */ +#endif /* H5_HAVE_PARALLEL */ hbool_t carry; /* Flag to indicate that chunk increment carrys to higher dimension (sorta) */ unsigned space_ndims; /* Dataset's space rank */ const hsize_t * space_dim; /* Dataset's dataspace dimensions */ @@ -4368,8 +4367,8 @@ H5D__chunk_allocate(const H5D_io_info_t *io_info, hbool_t full_overwrite, const using_mpi = TRUE; /* init chunk info stuff for collective I/O */ - chunk_info.num_io = 0; - chunk_info.addr = NULL; + chunk_fill_info.num_chunks = 0; + chunk_fill_info.chunk_info = NULL; } /* end if */ #endif /* H5_HAVE_PARALLEL */ @@ -4641,19 +4640,26 @@ H5D__chunk_allocate(const H5D_io_info_t *io_info, hbool_t full_overwrite, const if (using_mpi) { /* collect all chunk addresses to be written to write collectively at the end */ - /* allocate/resize address array if no more space left */ - /* Note that if we add support for parallel filters we must - * also store an array of chunk sizes and pass it to the - * apporpriate collective write function */ - if (0 == chunk_info.num_io % 1024) - if (NULL == (chunk_info.addr = (haddr_t *)H5MM_realloc( - chunk_info.addr, (chunk_info.num_io + 1024) * sizeof(haddr_t)))) + + /* allocate/resize chunk info array if no more space left */ + if (0 == chunk_fill_info.num_chunks % 1024) { + void *tmp_realloc; + + if (NULL == (tmp_realloc = H5MM_realloc(chunk_fill_info.chunk_info, + (chunk_fill_info.num_chunks + 1024) * + sizeof(struct chunk_coll_fill_info)))) HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, - "memory allocation failed for chunk addresses") + "memory allocation failed for chunk fill info") + + chunk_fill_info.chunk_info = tmp_realloc; + } - /* Store the chunk's address for later */ - chunk_info.addr[chunk_info.num_io] = udata.chunk_block.offset; - chunk_info.num_io++; + /* Store info about the chunk for later */ + chunk_fill_info.chunk_info[chunk_fill_info.num_chunks].addr = udata.chunk_block.offset; + chunk_fill_info.chunk_info[chunk_fill_info.num_chunks].chunk_size = chunk_size; + chunk_fill_info.chunk_info[chunk_fill_info.num_chunks].unfiltered_partial_chunk = + (*fill_buf == unfilt_fill_buf); + chunk_fill_info.num_chunks++; /* Indicate that blocks will be written */ blocks_written = TRUE; @@ -4726,7 +4732,7 @@ H5D__chunk_allocate(const H5D_io_info_t *io_info, hbool_t full_overwrite, const #ifdef H5_HAVE_PARALLEL /* do final collective I/O */ if (using_mpi && blocks_written) - if (H5D__chunk_collective_fill(dset, &chunk_info, chunk_size, fb_info.fill_buf) < 0) + if (H5D__chunk_collective_fill(dset, &chunk_fill_info, fb_info.fill_buf, unfilt_fill_buf) < 0) HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "unable to write raw data to file") #endif /* H5_HAVE_PARALLEL */ @@ -4742,8 +4748,8 @@ done: unfilt_fill_buf = H5D__chunk_mem_xfree(unfilt_fill_buf, &def_pline); #ifdef H5_HAVE_PARALLEL - if (using_mpi && chunk_info.addr) - H5MM_free(chunk_info.addr); + if (using_mpi && chunk_fill_info.chunk_info) + H5MM_free(chunk_fill_info.chunk_info); #endif FUNC_LEAVE_NOAPI(ret_value) @@ -4937,27 +4943,35 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_info_t *chunk_info, size_t chunk_size, - const void *fill_buf) +H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_fill_info_t *chunk_fill_info, + const void *fill_buf, const void *partial_chunk_fill_buf) { - MPI_Comm mpi_comm = MPI_COMM_NULL; /* MPI communicator for file */ - int mpi_rank = (-1); /* This process's rank */ - int mpi_size = (-1); /* MPI Comm size */ - int mpi_code; /* MPI return code */ - size_t num_blocks; /* Number of blocks between processes. */ - size_t leftover_blocks; /* Number of leftover blocks to handle */ - int blocks, leftover, block_len; /* converted to int for MPI */ + MPI_Comm mpi_comm = MPI_COMM_NULL; /* MPI communicator for file */ + int mpi_rank = (-1); /* This process's rank */ + int mpi_size = (-1); /* MPI Comm size */ + int mpi_code; /* MPI return code */ + size_t num_blocks; /* Number of blocks between processes. */ + size_t leftover_blocks; /* Number of leftover blocks to handle */ + int blocks, leftover; /* converted to int for MPI */ MPI_Aint * chunk_disp_array = NULL; + MPI_Aint * block_disps = NULL; int * block_lens = NULL; MPI_Datatype mem_type = MPI_BYTE, file_type = MPI_BYTE; H5FD_mpio_xfer_t prev_xfer_mode; /* Previous data xfer mode */ hbool_t have_xfer_mode = FALSE; /* Whether the previous xffer mode has been retrieved */ - hbool_t need_addr_sort = FALSE; - int i; /* Local index variable */ + hbool_t need_sort = FALSE; + size_t i; /* Local index variable */ herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_STATIC + /* + * If a separate fill buffer is provided for partial chunks, ensure + * that the "don't filter partial edge chunks" flag is set. + */ + if (partial_chunk_fill_buf) + HDassert(dset->shared->layout.u.chunk.flags & H5O_LAYOUT_CHUNK_DONT_FILTER_PARTIAL_BOUND_CHUNKS); + /* Get the MPI communicator */ if (MPI_COMM_NULL == (mpi_comm = H5F_mpi_get_comm(dset->oloc.file))) HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI communicator") @@ -4973,39 +4987,89 @@ H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_info_t *chunk_info, /* Distribute evenly the number of blocks between processes. */ if (mpi_size == 0) HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "Resulted in division by zero") - num_blocks = (size_t)(chunk_info->num_io / (size_t)mpi_size); /* value should be the same on all procs */ + num_blocks = + (size_t)(chunk_fill_info->num_chunks / (size_t)mpi_size); /* value should be the same on all procs */ /* After evenly distributing the blocks between processes, are there any * leftover blocks for each individual process (round-robin)? */ - leftover_blocks = (size_t)(chunk_info->num_io % (size_t)mpi_size); + leftover_blocks = (size_t)(chunk_fill_info->num_chunks % (size_t)mpi_size); /* Cast values to types needed by MPI */ H5_CHECKED_ASSIGN(blocks, int, num_blocks, size_t); H5_CHECKED_ASSIGN(leftover, int, leftover_blocks, size_t); - H5_CHECKED_ASSIGN(block_len, int, chunk_size, size_t); /* Check if we have any chunks to write on this rank */ if (num_blocks > 0 || (leftover && leftover > mpi_rank)) { + MPI_Aint partial_fill_buf_disp = 0; + hbool_t all_same_block_len = TRUE; + /* Allocate buffers */ - /* (MSC - should not need block_lens if MPI_type_create_hindexed_block is working) */ - if (NULL == (block_lens = (int *)H5MM_malloc((size_t)(blocks + 1) * sizeof(int)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk lengths buffer") if (NULL == (chunk_disp_array = (MPI_Aint *)H5MM_malloc((size_t)(blocks + 1) * sizeof(MPI_Aint)))) HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk file displacement buffer") - for (i = 0; i < blocks; i++) { - /* store the chunk address as an MPI_Aint */ - chunk_disp_array[i] = (MPI_Aint)(chunk_info->addr[i + (mpi_rank * blocks)]); + if (partial_chunk_fill_buf) { + MPI_Aint fill_buf_addr; + MPI_Aint partial_fill_buf_addr; + + /* Calculate the displacement between the fill buffer and partial chunk fill buffer */ + if (MPI_SUCCESS != (mpi_code = MPI_Get_address(fill_buf, &fill_buf_addr))) + HMPI_GOTO_ERROR(FAIL, "MPI_Get_address failed", mpi_code) + if (MPI_SUCCESS != (mpi_code = MPI_Get_address(partial_chunk_fill_buf, &partial_fill_buf_addr))) + HMPI_GOTO_ERROR(FAIL, "MPI_Get_address failed", mpi_code) - /* MSC - should not need this if MPI_type_create_hindexed_block is working */ - block_lens[i] = block_len; +#if MPI_VERSION >= 3 && MPI_SUBVERSION >= 1 + partial_fill_buf_disp = MPI_Aint_diff(partial_fill_buf_addr, fill_buf_addr); +#else + partial_fill_buf_disp = partial_fill_buf_addr - fill_buf_addr; +#endif - /* Make sure that the addresses in the datatype are - * monotonically non-decreasing + /* + * Allocate all-zero block displacements array. If a block's displacement + * is left as zero, that block will be written to from the regular fill + * buffer. If a block represents an unfiltered partial edge chunk, its + * displacement will be set so that the block is written to from the + * unfiltered fill buffer. */ - if (i && (chunk_disp_array[i] < chunk_disp_array[i - 1])) - need_addr_sort = TRUE; + if (NULL == (block_disps = (MPI_Aint *)H5MM_calloc((size_t)(blocks + 1) * sizeof(MPI_Aint)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate block displacements buffer") + } + + /* + * Perform initial scan of chunk info list to: + * - make sure that chunk addresses are monotonically non-decreasing + * - check if all blocks have the same length + */ + for (i = 1; i < chunk_fill_info->num_chunks; i++) { + if (chunk_fill_info->chunk_info[i].addr < chunk_fill_info->chunk_info[i - 1].addr) + need_sort = TRUE; + + if (chunk_fill_info->chunk_info[i].chunk_size != chunk_fill_info->chunk_info[i - 1].chunk_size) + all_same_block_len = FALSE; + } + + if (need_sort) + HDqsort(chunk_fill_info->chunk_info, chunk_fill_info->num_chunks, + sizeof(struct chunk_coll_fill_info), H5D__chunk_cmp_coll_fill_info); + + /* Allocate buffer for block lengths if necessary */ + if (!all_same_block_len) + if (NULL == (block_lens = (int *)H5MM_malloc((size_t)(blocks + 1) * sizeof(int)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk lengths buffer") + + for (i = 0; i < (size_t)blocks; i++) { + size_t idx = i + (size_t)(mpi_rank * blocks); + + /* store the chunk address as an MPI_Aint */ + chunk_disp_array[i] = (MPI_Aint)(chunk_fill_info->chunk_info[idx].addr); + + if (!all_same_block_len) + H5_CHECKED_ASSIGN(block_lens[i], int, chunk_fill_info->chunk_info[idx].chunk_size, size_t); + + if (chunk_fill_info->chunk_info[idx].unfiltered_partial_chunk) { + HDassert(partial_chunk_fill_buf); + block_disps[i] = partial_fill_buf_disp; + } } /* end for */ /* Calculate if there are any leftover blocks after evenly @@ -5013,32 +5077,71 @@ H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_info_t *chunk_info, * to processes 0 -> leftover. */ if (leftover && leftover > mpi_rank) { - chunk_disp_array[blocks] = (MPI_Aint)chunk_info->addr[(blocks * mpi_size) + mpi_rank]; - if (blocks && (chunk_disp_array[blocks] < chunk_disp_array[blocks - 1])) - need_addr_sort = TRUE; - block_lens[blocks] = block_len; + chunk_disp_array[blocks] = + (MPI_Aint)chunk_fill_info->chunk_info[(blocks * mpi_size) + mpi_rank].addr; + + if (!all_same_block_len) + H5_CHECKED_ASSIGN(block_lens[blocks], int, + chunk_fill_info->chunk_info[(blocks * mpi_size) + mpi_rank].chunk_size, + size_t); + + if (chunk_fill_info->chunk_info[(blocks * mpi_size) + mpi_rank].unfiltered_partial_chunk) { + HDassert(partial_chunk_fill_buf); + block_disps[blocks] = partial_fill_buf_disp; + } + blocks++; } - /* Ensure that the blocks are sorted in monotonically non-decreasing - * order of offset in the file. - */ - if (need_addr_sort) - HDqsort(chunk_disp_array, (size_t)blocks, sizeof(MPI_Aint), H5D__chunk_cmp_addr); + /* Create file and memory types for the write operation */ + if (all_same_block_len) { + int block_len; + + H5_CHECKED_ASSIGN(block_len, int, chunk_fill_info->chunk_info[0].chunk_size, size_t); + + mpi_code = + MPI_Type_create_hindexed_block(blocks, block_len, chunk_disp_array, MPI_BYTE, &file_type); + if (mpi_code != MPI_SUCCESS) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code) + + if (partial_chunk_fill_buf) { + /* + * If filters are disabled for partial edge chunks, those chunks could + * potentially have the same block length as the other chunks, but still + * need to be written to using the unfiltered fill buffer. Use an hindexed + * block type rather than an hvector. + */ + mpi_code = + MPI_Type_create_hindexed_block(blocks, block_len, block_disps, MPI_BYTE, &mem_type); + if (mpi_code != MPI_SUCCESS) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code) + } + else { + mpi_code = MPI_Type_create_hvector(blocks, block_len, 0, MPI_BYTE, &mem_type); + if (mpi_code != MPI_SUCCESS) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) + } + } + else { + /* + * Currently, different block lengths implies that there are partial + * edge chunks and the "don't filter partial edge chunks" flag is set. + */ + HDassert(partial_chunk_fill_buf); + HDassert(block_lens); + HDassert(block_disps); + + mpi_code = MPI_Type_create_hindexed(blocks, block_lens, chunk_disp_array, MPI_BYTE, &file_type); + if (mpi_code != MPI_SUCCESS) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) + + mpi_code = MPI_Type_create_hindexed(blocks, block_lens, block_disps, MPI_BYTE, &mem_type); + if (mpi_code != MPI_SUCCESS) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) + } - /* MSC - should use this if MPI_type_create_hindexed block is working: - * mpi_code = MPI_Type_create_hindexed_block(blocks, block_len, chunk_disp_array, MPI_BYTE, - * &file_type); - */ - mpi_code = MPI_Type_create_hindexed(blocks, block_lens, chunk_disp_array, MPI_BYTE, &file_type); - if (mpi_code != MPI_SUCCESS) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&file_type))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) - - mpi_code = MPI_Type_create_hvector(blocks, block_len, 0, MPI_BYTE, &mem_type); - if (mpi_code != MPI_SUCCESS) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&mem_type))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) } /* end if */ @@ -5081,39 +5184,25 @@ done: if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type))) HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) H5MM_xfree(chunk_disp_array); + H5MM_xfree(block_disps); H5MM_xfree(block_lens); FUNC_LEAVE_NOAPI(ret_value) } /* end H5D__chunk_collective_fill() */ static int -H5D__chunk_cmp_addr(const void *addr1, const void *addr2) +H5D__chunk_cmp_coll_fill_info(const void *_entry1, const void *_entry2) { - MPI_Aint _addr1 = (MPI_Aint)0, _addr2 = (MPI_Aint)0; - int ret_value = 0; + const struct chunk_coll_fill_info *entry1; + const struct chunk_coll_fill_info *entry2; FUNC_ENTER_STATIC_NOERR - _addr1 = *((const MPI_Aint *)addr1); - _addr2 = *((const MPI_Aint *)addr2); + entry1 = (const struct chunk_coll_fill_info *)_entry1; + entry2 = (const struct chunk_coll_fill_info *)_entry2; -#if MPI_VERSION >= 3 && MPI_SUBVERSION >= 1 - { - MPI_Aint diff = MPI_Aint_diff(_addr1, _addr2); - - if (diff < (MPI_Aint)0) - ret_value = -1; - else if (diff > (MPI_Aint)0) - ret_value = 1; - else - ret_value = 0; - } -#else - ret_value = (_addr1 > _addr2) - (_addr1 < _addr2); -#endif - - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5D__chunk_cmp_addr() */ + FUNC_LEAVE_NOAPI(H5F_addr_cmp(entry1->addr, entry2->addr)) +} /* end H5D__chunk_cmp_coll_fill_info() */ #endif /* H5_HAVE_PARALLEL */ /*------------------------------------------------------------------------- @@ -6827,7 +6916,7 @@ done: * *------------------------------------------------------------------------- */ -static hbool_t +hbool_t H5D__chunk_is_partial_edge_chunk(unsigned dset_ndims, const uint32_t *chunk_dims, const hsize_t scaled[], const hsize_t *dset_dims) { @@ -7122,6 +7211,89 @@ done: } /* end H5D__chunk_format_convert() */ /*------------------------------------------------------------------------- + * Function: H5D__chunk_index_empty_cb + * + * Purpose: Callback function that simply stops iteration and sets the + * `empty` parameter to FALSE if called. If this callback is + * entered, it means that the chunk index contains at least + * one chunk, so is not empty. + * + * Return: H5_ITER_STOP + * + *------------------------------------------------------------------------- + */ +static int +H5D__chunk_index_empty_cb(const H5D_chunk_rec_t H5_ATTR_UNUSED *chunk_rec, void *_udata) +{ + hbool_t *empty = (hbool_t *)_udata; + int ret_value = H5_ITER_STOP; + + FUNC_ENTER_STATIC_NOERR + + *empty = FALSE; + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__chunk_index_empty_cb() */ + +/*------------------------------------------------------------------------- + * Function: H5D__chunk_index_empty + * + * Purpose: Determines whether a chunk index is empty (has no chunks + * inserted into it yet). + * + * Note: This routine is meant to be a little more performant than + * just counting the number of chunks in the index. In the + * future, this is probably a callback that the chunk index + * ops structure should provide. + * + * Return: Non-negative on Success/Negative on failure + * + *------------------------------------------------------------------------- + */ +herr_t +H5D__chunk_index_empty(const H5D_t *dset, hbool_t *empty) +{ + H5D_chk_idx_info_t idx_info; /* Chunked index info */ + H5D_rdcc_ent_t * ent; /* Cache entry */ + const H5D_rdcc_t * rdcc = NULL; /* Raw data chunk cache */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_PACKAGE_TAG(dset->oloc.addr) + + HDassert(dset); + HDassert(dset->shared); + HDassert(empty); + + rdcc = &(dset->shared->cache.chunk); /* raw data chunk cache */ + HDassert(rdcc); + + /* Search for cached chunks that haven't been written out */ + for (ent = rdcc->head; ent; ent = ent->next) + /* Flush the chunk out to disk, to make certain the size is correct later */ + if (H5D__chunk_flush_entry(dset, ent, FALSE) < 0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "cannot flush indexed storage buffer") + + /* Compose chunked index info struct */ + idx_info.f = dset->oloc.file; + idx_info.pline = &dset->shared->dcpl_cache.pline; + idx_info.layout = &dset->shared->layout.u.chunk; + idx_info.storage = &dset->shared->layout.storage.u.chunk; + + *empty = TRUE; + + if (H5F_addr_defined(idx_info.storage->idx_addr)) { + /* Iterate over the allocated chunks */ + if ((dset->shared->layout.storage.u.chunk.ops->iterate)(&idx_info, H5D__chunk_index_empty_cb, empty) < + 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, + "unable to retrieve allocated chunk information from index") + } + +done: + FUNC_LEAVE_NOAPI_TAG(ret_value) +} /* end H5D__chunk_index_empty() */ + +/*------------------------------------------------------------------------- * Function: H5D__get_num_chunks_cb * * Purpose: Callback function that increments the number of written diff --git a/src/H5Dint.c b/src/H5Dint.c index c9ea6bd..cc17265 100644 --- a/src/H5Dint.c +++ b/src/H5Dint.c @@ -378,40 +378,18 @@ H5D__get_space_status(const H5D_t *dset, H5D_space_status_t *allocation) /* Check for chunked layout */ if (dset->shared->layout.type == H5D_CHUNKED) { - hsize_t space_allocated; /* The number of bytes allocated for chunks */ - hssize_t snelmts; /* Temporary holder for number of elements in dataspace */ - hsize_t nelmts; /* Number of elements in dataspace */ - size_t dt_size; /* Size of datatype */ - hsize_t full_size; /* The number of bytes in the dataset when fully populated */ - - /* For chunked layout set the space status by the storage size */ - /* Get the dataset's dataspace */ - HDassert(dset->shared->space); - - /* Get the total number of elements in dataset's dataspace */ - if ((snelmts = H5S_GET_EXTENT_NPOINTS(dset->shared->space)) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to retrieve number of elements in dataspace") - nelmts = (hsize_t)snelmts; - - /* Get the size of the dataset's datatype */ - if (0 == (dt_size = H5T_GET_SIZE(dset->shared->type))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to retrieve size of datatype") - - /* Compute the maximum size of the dataset in bytes */ - full_size = nelmts * dt_size; - - /* Check for overflow during multiplication */ - if (nelmts != (full_size / dt_size)) - HGOTO_ERROR(H5E_DATASET, H5E_OVERFLOW, FAIL, "size of dataset's storage overflowed") - - /* Difficult to error check, since the error value is 0 and 0 is a valid value... :-/ */ - if (H5D__get_storage_size(dset, &space_allocated) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get size of dataset's storage") - - /* Decide on how much of the space is allocated */ - if (space_allocated == 0) + hsize_t n_chunks_total = dset->shared->layout.u.chunk.nchunks; + hsize_t n_chunks_alloc = 0; + + if (H5D__get_num_chunks(dset, dset->shared->space, &n_chunks_alloc) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, + "unable to retrieve number of allocated chunks in dataset") + + HDassert(n_chunks_alloc <= n_chunks_total); + + if (n_chunks_alloc == 0) *allocation = H5D_SPACE_STATUS_NOT_ALLOCATED; - else if (space_allocated == full_size) + else if (n_chunks_alloc == n_chunks_total) *allocation = H5D_SPACE_STATUS_ALLOCATED; else *allocation = H5D_SPACE_STATUS_PART_ALLOCATED; @@ -1301,10 +1279,19 @@ H5D__create(H5F_t *file, hid_t type_id, const H5S_t *space, hid_t dcpl_id, hid_t HGOTO_ERROR(H5E_DATASET, H5E_CANTSET, NULL, "can't set latest indexing") } /* end if */ - /* Check if this dataset is going into a parallel file and set space allocation time */ + /* Check if the file driver would like to force early space allocation */ if (H5F_HAS_FEATURE(file, H5FD_FEAT_ALLOCATE_EARLY)) new_dset->shared->dcpl_cache.fill.alloc_time = H5D_ALLOC_TIME_EARLY; + /* + * Check if this dataset is going into a parallel file and set space allocation time. + * If the dataset has filters applied to it, writes to the dataset must be collective, + * so we don't need to force early space allocation. Otherwise, we force early space + * allocation to facilitate independent raw data operations. + */ + if (H5F_HAS_FEATURE(file, H5FD_FEAT_HAS_MPI) && (new_dset->shared->dcpl_cache.pline.nused == 0)) + new_dset->shared->dcpl_cache.fill.alloc_time = H5D_ALLOC_TIME_EARLY; + /* Set the dataset's I/O operations */ if (H5D__layout_set_io_ops(new_dset) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, NULL, "unable to initialize I/O operations") diff --git a/src/H5Dio.c b/src/H5Dio.c index 1ea3f07..e226a0a 100644 --- a/src/H5Dio.c +++ b/src/H5Dio.c @@ -300,6 +300,7 @@ H5D__write(H5D_t *dataset, hid_t mem_type_id, H5S_t *mem_space, H5S_t *file_spac H5D_io_info_t io_info; /* Dataset I/O info */ H5D_type_info_t type_info; /* Datatype info for operation */ hbool_t type_info_init = FALSE; /* Whether the datatype info has been initialized */ + hbool_t should_alloc_space = FALSE; /* Whether or not to initialize dataset's storage */ H5S_t * projected_mem_space = NULL; /* If not NULL, ptr to dataspace containing a */ /* projection of the supplied mem_space to a new */ /* dataspace with rank equal to that of */ @@ -432,8 +433,20 @@ H5D__write(H5D_t *dataset, hid_t mem_type_id, H5S_t *mem_space, H5S_t *file_spac HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to set up I/O operation") /* Allocate dataspace and initialize it if it hasn't been. */ - if (nelmts > 0 && dataset->shared->dcpl_cache.efl.nused == 0 && - !(*dataset->shared->layout.ops->is_space_alloc)(&dataset->shared->layout.storage)) { + should_alloc_space = dataset->shared->dcpl_cache.efl.nused == 0 && + !(*dataset->shared->layout.ops->is_space_alloc)(&dataset->shared->layout.storage); + + /* + * If not using an MPI-based VFD, we only need to allocate + * and initialize storage if there's a selection in the + * dataset's dataspace. Otherwise, we always need to participate + * in the storage allocation since this may use collective + * operations and we will hang if we don't participate. + */ + if (!H5F_HAS_FEATURE(dataset->oloc.file, H5FD_FEAT_HAS_MPI)) + should_alloc_space = should_alloc_space && (nelmts > 0); + + if (should_alloc_space) { hssize_t file_nelmts; /* Number of elements in file dataset's dataspace */ hbool_t full_overwrite; /* Whether we are over-writing all the elements */ @@ -808,98 +821,35 @@ H5D__ioinfo_adjust(H5D_io_info_t *io_info, const H5D_t *dset, const H5S_t *file_ io_info->io_ops.single_write = H5D__mpio_select_write; } /* end if */ else { - int comm_size = 0; - - /* Retrieve size of MPI communicator used for file */ - if ((comm_size = H5F_shared_mpi_get_size(io_info->f_sh)) < 0) - HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get MPI communicator size") - /* Check if there are any filters in the pipeline. If there are, * we cannot break to independent I/O if this is a write operation * with multiple ranks involved; otherwise, there will be metadata * inconsistencies in the file. */ - if (comm_size > 1 && io_info->op_type == H5D_IO_OP_WRITE && - io_info->dset->shared->dcpl_cache.pline.nused > 0) { - H5D_mpio_no_collective_cause_t cause; - uint32_t local_no_collective_cause; - uint32_t global_no_collective_cause; - hbool_t local_error_message_previously_written = FALSE; - hbool_t global_error_message_previously_written = FALSE; - size_t idx; - size_t cause_strings_len; - char local_no_collective_cause_string[512] = ""; - char global_no_collective_cause_string[512] = ""; - const char * cause_strings[] = { - "independent I/O was requested", - "datatype conversions were required", - "data transforms needed to be applied", - "optimized MPI types flag wasn't set", - "one of the dataspaces was neither simple nor scalar", - "dataset was not contiguous or chunked", - "parallel writes to filtered datasets are disabled", - "an error occurred while checking if collective I/O was possible"}; - - cause_strings_len = sizeof(cause_strings) / sizeof(cause_strings[0]); - - if (H5CX_get_mpio_local_no_coll_cause(&local_no_collective_cause) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, - "unable to get local no collective cause value") - if (H5CX_get_mpio_global_no_coll_cause(&global_no_collective_cause) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, - "unable to get global no collective cause value") - - /* Append each of the "reason for breaking collective I/O" error messages to the - * local and global no collective cause strings */ - for (cause = 1, idx = 0; - (cause < H5D_MPIO_NO_COLLECTIVE_MAX_CAUSE) && (idx < cause_strings_len); - cause <<= 1, idx++) { - if (cause & local_no_collective_cause) { - size_t local_buffer_space = sizeof(local_no_collective_cause_string) - - HDstrlen(local_no_collective_cause_string) - 1; - - /* Check if there were any previous error messages included. If so, prepend a - * semicolon to separate the messages. - */ - if (local_buffer_space && local_error_message_previously_written) { - HDstrncat(local_no_collective_cause_string, "; ", local_buffer_space); - local_buffer_space -= MIN(local_buffer_space, 2); - } - - if (local_buffer_space) - HDstrncat(local_no_collective_cause_string, cause_strings[idx], - local_buffer_space); - - local_error_message_previously_written = TRUE; - } /* end if */ - - if (cause & global_no_collective_cause) { - size_t global_buffer_space = sizeof(global_no_collective_cause_string) - - HDstrlen(global_no_collective_cause_string) - 1; - - /* Check if there were any previous error messages included. If so, prepend a - * semicolon to separate the messages. - */ - if (global_buffer_space && global_error_message_previously_written) { - HDstrncat(global_no_collective_cause_string, "; ", global_buffer_space); - global_buffer_space -= MIN(global_buffer_space, 2); - } - - if (global_buffer_space) - HDstrncat(global_no_collective_cause_string, cause_strings[idx], - global_buffer_space); - - global_error_message_previously_written = TRUE; - } /* end if */ - } /* end for */ - - HGOTO_ERROR(H5E_IO, H5E_NO_INDEPENDENT, FAIL, - "Can't perform independent write with filters in pipeline.\n" - " The following caused a break from collective I/O:\n" - " Local causes: %s\n" - " Global causes: %s", - local_no_collective_cause_string, global_no_collective_cause_string); - } /* end if */ + if (io_info->op_type == H5D_IO_OP_WRITE && io_info->dset->shared->dcpl_cache.pline.nused > 0) { + int comm_size = 0; + + /* Retrieve size of MPI communicator used for file */ + if ((comm_size = H5F_shared_mpi_get_size(io_info->f_sh)) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get MPI communicator size") + + if (comm_size > 1) { + char local_no_coll_cause_string[512]; + char global_no_coll_cause_string[512]; + + if (H5D__mpio_get_no_coll_cause_strings(local_no_coll_cause_string, 512, + global_no_coll_cause_string, 512) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, + "can't get reasons for breaking collective I/O") + + HGOTO_ERROR(H5E_IO, H5E_NO_INDEPENDENT, FAIL, + "Can't perform independent write with filters in pipeline.\n" + " The following caused a break from collective I/O:\n" + " Local causes: %s\n" + " Global causes: %s", + local_no_coll_cause_string, global_no_coll_cause_string); + } + } /* If we won't be doing collective I/O, but the user asked for * collective I/O, change the request to use independent I/O diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c index ce790c4..527fc7b 100644 --- a/src/H5Dmpio.c +++ b/src/H5Dmpio.c @@ -36,6 +36,7 @@ #include "H5Eprivate.h" /* Error handling */ #include "H5Fprivate.h" /* File access */ #include "H5FDprivate.h" /* File drivers */ +#include "H5FLprivate.h" /* Free Lists */ #include "H5Iprivate.h" /* IDs */ #include "H5MMprivate.h" /* Memory management */ #include "H5Oprivate.h" /* Object headers */ @@ -43,6 +44,15 @@ #include "H5Sprivate.h" /* Dataspaces */ #include "H5VMprivate.h" /* Vector */ +/* uthash is an external, header-only hash table implementation. + * + * We include the file directly in src/ and #define a few functions + * to use our internal memory calls. + */ +#define uthash_malloc(sz) H5MM_malloc(sz) +#define uthash_free(ptr, sz) H5MM_free(ptr) /* Ignoring sz is intentional */ +#include "uthash.h" + #ifdef H5_HAVE_PARALLEL /****************/ @@ -81,9 +91,54 @@ /* Macros to represent the regularity of the selection for multiple chunk IO case. */ #define H5D_CHUNK_SELECT_REG 1 +/* + * Threshold value for redistributing shared filtered chunks + * on all MPI ranks, or just MPI rank 0 + */ +#define H5D_CHUNK_REDISTRIBUTE_THRES ((size_t)((25 * H5_MB) / sizeof(H5D_chunk_redistribute_info_t))) + +/* + * Initial allocation size for the arrays that hold + * buffers for chunk modification data that is sent + * to other ranks and the MPI_Request objects for + * those send operations + */ +#define H5D_CHUNK_NUM_SEND_MSGS_INIT 64 + +/* + * Define a tag value for the MPI messages sent/received for + * chunk modification data + */ +#define H5D_CHUNK_MOD_DATA_TAG 64 + +/* + * Macro to initialize a H5D_chk_idx_info_t + * structure, given a pointer to a H5D_io_info_t + * structure + */ +#define H5D_MPIO_INIT_CHUNK_IDX_INFO(index_info, io_info_ptr) \ + do { \ + index_info.f = (io_info_ptr)->dset->oloc.file; \ + index_info.pline = &((io_info_ptr)->dset->shared->dcpl_cache.pline); \ + index_info.layout = &((io_info_ptr)->dset->shared->layout.u.chunk); \ + index_info.storage = &((io_info_ptr)->dset->shared->layout.storage.u.chunk); \ + } while (0) + +/* + * Macro to initialize a H5D_chunk_ud_t structure + * given a pointer to a H5D_chk_idx_info_t structure + */ +#define H5D_MPIO_INIT_CHUNK_UD_INFO(chunk_ud, index_info_ptr) \ + do { \ + HDmemset(&chunk_ud, 0, sizeof(H5D_chunk_ud_t)); \ + chunk_ud.common.layout = (index_info_ptr)->layout; \ + chunk_ud.common.storage = (index_info_ptr)->storage; \ + } while (0) + /******************/ /* Local Typedefs */ /******************/ + /* Combine chunk address and chunk info into a struct for better performance. */ typedef struct H5D_chunk_addr_info_t { haddr_t chunk_addr; @@ -100,115 +155,137 @@ typedef enum H5D_mpio_no_rank0_bcast_cause_t { } H5D_mpio_no_rank0_bcast_cause_t; /* + * Information necessary for re-allocating file space for a chunk + * during a parallel write of a chunked dataset with filters + * applied. + */ +typedef struct H5D_chunk_alloc_info_t { + H5F_block_t chunk_current; + H5F_block_t chunk_new; + hsize_t chunk_idx; +} H5D_chunk_alloc_info_t; + +/* + * Information for a chunk pertaining to the dataset's chunk + * index entry for the chunk + */ +typedef struct H5D_chunk_index_info_t { + hsize_t chunk_idx; + unsigned filter_mask; + hbool_t need_insert; +} H5D_chunk_index_info_t; + +/* * Information about a single chunk when performing collective filtered I/O. All * of the fields of one of these structs are initialized at the start of collective - * filtered I/O in the function H5D__construct_filtered_io_info_list(). - * - * This struct's fields are as follows: - * - * index - The "Index" of the chunk in the dataset. The index of a chunk is used during - * the collective re-insertion of chunks into the chunk index after the collective - * I/O has been performed. - * - * scaled - The scaled coordinates of the chunk in the dataset's file dataspace. The - * coordinates are used in both the collective re-allocation of space in the file - * and the collective re-insertion of chunks into the chunk index after the collective - * I/O has been performed. + * filtered I/O in the function H5D__mpio_collective_filtered_chunk_io_setup(). This + * struct's fields are as follows: * - * full_overwrite - A flag which determines whether or not a chunk needs to be read from the - * file when being updated. If a chunk is being fully overwritten (the entire - * extent is selected in its file dataspace), then it is not necessary to - * read the chunk from the file. However, if the chunk is not being fully - * overwritten, it has to be read from the file in order to update the chunk - * without trashing the parts of the chunk that are not selected. + * index_info - A structure containing the information needed when collectively + * re-inserting the chunk into the dataset's chunk index. The structure + * is distributed to all ranks during the re-insertion operation. Its fields + * are as follows: * - * num_writers - The total number of processors writing to this chunk. This field is used - * when the new owner of a chunk is receiving messages, which contain selections in - * the chunk and data to update the chunk with, from other processors which have this - * chunk selected in the I/O operation. The new owner must know how many processors it - * should expect messages from so that it can post an equal number of receive calls. + * chunk_idx - The index of the chunk in the dataset's chunk index. * - * io_size - The total size of I/O to this chunk. This field is an accumulation of the size of - * I/O to the chunk from each processor which has the chunk selected and is used to - * determine the value for the previous full_overwrite flag. + * filter_mask - A bit-mask that indicates which filters are to be applied to the + * chunk. Each filter in a chunk's filter pipeline has a bit position + * that can be masked to disable that particular filter for the chunk. + * This filter mask is saved alongside the chunk in the file. * - * buf - A pointer which serves the dual purpose of holding either the chunk data which is to be - * written to the file or the chunk data which has been read from the file. + * need_insert - A flag which determines whether or not a chunk needs to be re-inserted into + * the chunk index after the write operation. * - * chunk_states - In the case of dataset writes only, this struct is used to track a chunk's size and - * address in the file before and after the filtering operation has occurred. + * chunk_info - A pointer to the chunk's H5D_chunk_info_t structure, which contains useful + * information like the dataspaces containing the selection in the chunk. * - * Its fields are as follows: + * chunk_current - The address in the file and size of this chunk before the filtering + * operation. When reading a chunk from the file, this field is used to + * read the correct amount of bytes. It is also used when redistributing + * shared chunks among MPI ranks and as a parameter to the chunk file + * space reallocation function. * - * chunk_current - The address in the file and size of this chunk before the filtering - * operation. When reading a chunk from the file, this field is used to - * read the correct amount of bytes. It is also used when redistributing - * shared chunks among processors and as a parameter to the chunk file - * space reallocation function. + * chunk_new - The address in the file and size of this chunk after the filtering + * operation. This field is relevant when collectively re-allocating space + * in the file for all of the chunks written to in the I/O operation, as + * their sizes may have changed after their data has been filtered. * - * new_chunk - The address in the file and size of this chunk after the filtering - * operation. This field is relevant when collectively re-allocating space - * in the file for all of the chunks written to in the I/O operation, as - * their sizes may have changed after their data has been filtered. + * need_read - A flag which determines whether or not a chunk needs to be read from the + * file. During writes, if a chunk is being fully overwritten (the entire extent + * is selected in its file dataspace), then it is not necessary to read the chunk + * from the file. However, if the chunk is not being fully overwritten, it has to + * be read from the file in order to update the chunk without trashing the parts + * of the chunk that are not selected. During reads, this field should generally + * be true, but may be false if the chunk isn't allocated, for example. * - * owners - In the case of dataset writes only, this struct is used to manage which single processor - * will ultimately write data out to the chunk. It allows the other processors to act according - * to the decision and send their selection in the chunk, as well as the data they wish - * to update the chunk with, to the processor which is writing to the chunk. + * skip_filter_pline - A flag which determines whether to skip calls to the filter pipeline + * for this chunk. This flag is mostly useful for correct handling of + * partial edge chunks when the "don't filter partial edge chunks" flag + * is set on the dataset's DCPL. * - * Its fields are as follows: + * io_size - The total size of I/O to this chunk. This field is an accumulation of the size of + * I/O to the chunk from each MPI rank which has the chunk selected and is used to + * determine the value for the previous `full_overwrite` flag. * - * original_owner - The processor which originally had this chunk selected at the beginning of - * the collective filtered I/O operation. This field is currently used when - * redistributing shared chunks among processors. + * chunk_buf_size - The size in bytes of the data buffer allocated for the chunk * - * new_owner - The processor which has been selected to perform the write to this chunk. + * orig_owner - The MPI rank which originally had this chunk selected at the beginning of + * the collective filtered I/O operation. This field is currently used when + * redistributing shared chunks among MPI ranks. * - * async_info - In the case of dataset writes only, this struct is used by the owning processor of the - * chunk in order to manage the MPI send and receive calls made between it and all of - * the other processors which have this chunk selected in the I/O operation. + * new_owner - The MPI rank which has been selected to perform the modifications to this chunk. * - * Its fields are as follows: + * num_writers - The total number of MPI ranks writing to this chunk. This field is used when + * the new owner of a chunk is receiving messages from other MPI ranks that + * contain their selections in the chunk and the data to update the chunk with. + * The new owner must know how many MPI ranks it should expect messages from so + * that it can post an equal number of receive calls. * - * receive_requests_array - An array containing one MPI_Request for each of the - * asynchronous MPI receive calls the owning processor of this - * chunk makes to another processor in order to receive that - * processor's chunk modification data and selection in the chunk. + * buf - A pointer which serves the dual purpose of holding either the chunk data which is to be + * written to the file or the chunk data which has been read from the file. * - * receive_buffer_array - An array of buffers into which the owning processor of this chunk - * will store chunk modification data and the selection in the chunk - * received from another processor. + * hh - A handle for hash tables provided by the uthash.h header * - * num_receive_requests - The number of entries in the receive_request_array and - * receive_buffer_array fields. */ typedef struct H5D_filtered_collective_io_info_t { - hsize_t index; - hsize_t scaled[H5O_LAYOUT_NDIMS]; - hbool_t full_overwrite; - size_t num_writers; - size_t io_size; - void * buf; - - struct { - H5F_block_t chunk_current; - H5F_block_t new_chunk; - } chunk_states; - - struct { - int original_owner; - int new_owner; - } owners; - - struct { - MPI_Request * receive_requests_array; - unsigned char **receive_buffer_array; - int num_receive_requests; - } async_info; + H5D_chunk_index_info_t index_info; + + H5D_chunk_info_t *chunk_info; + H5F_block_t chunk_current; + H5F_block_t chunk_new; + hbool_t need_read; + hbool_t skip_filter_pline; + size_t io_size; + size_t chunk_buf_size; + int orig_owner; + int new_owner; + int num_writers; + void * buf; + + UT_hash_handle hh; } H5D_filtered_collective_io_info_t; -/* Function pointer typedef for sort function */ -typedef int (*H5D_mpio_sort_func_cb_t)(const void *, const void *); +/* + * Information necessary for redistributing shared chunks during + * a parallel write of a chunked dataset with filters applied. + */ +typedef struct H5D_chunk_redistribute_info_t { + H5F_block_t chunk_block; + hsize_t chunk_idx; + int orig_owner; + int new_owner; + int num_writers; +} H5D_chunk_redistribute_info_t; + +/* + * Information used when re-inserting a chunk into a dataset's + * chunk index during a parallel write of a chunked dataset with + * filters applied. + */ +typedef struct H5D_chunk_insert_info_t { + H5F_block_t chunk_block; + H5D_chunk_index_info_t index_info; +} H5D_chunk_insert_info_t; /********************/ /* Local Prototypes */ @@ -216,53 +293,98 @@ typedef int (*H5D_mpio_sort_func_cb_t)(const void *, const void *); static herr_t H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, H5D_chunk_map_t *fm); static herr_t H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, - H5D_chunk_map_t *fm); + H5D_chunk_map_t *fm, int mpi_rank, int mpi_size); static herr_t H5D__multi_chunk_filtered_collective_io(H5D_io_info_t * io_info, - const H5D_type_info_t *type_info, H5D_chunk_map_t *fm); + const H5D_type_info_t *type_info, H5D_chunk_map_t *fm, + int mpi_rank, int mpi_size); static herr_t H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, - H5D_chunk_map_t *fm, int sum_chunk); + H5D_chunk_map_t *fm, int sum_chunk, int mpi_rank, int mpi_size); static herr_t H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, - H5D_chunk_map_t *fm); + H5D_chunk_map_t *fm, int mpi_rank, int mpi_size); static herr_t H5D__inter_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, const H5S_t *file_space, const H5S_t *mem_space); static herr_t H5D__final_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts, MPI_Datatype mpi_file_type, MPI_Datatype mpi_buf_type); static herr_t H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm, - H5D_chunk_addr_info_t chunk_addr_info_array[], int many_chunk_opt); + H5D_chunk_addr_info_t chunk_addr_info_array[], int many_chunk_opt, int mpi_rank, + int mpi_size); static herr_t H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_chunk_map_t *fm, uint8_t assign_io_mode[], - haddr_t chunk_addr[]); + haddr_t chunk_addr[], int mpi_rank, int mpi_size); static herr_t H5D__mpio_get_sum_chunk(const H5D_io_info_t *io_info, const H5D_chunk_map_t *fm, int *sum_chunkf); -static herr_t H5D__construct_filtered_io_info_list(const H5D_io_info_t * io_info, - const H5D_type_info_t * type_info, - const H5D_chunk_map_t * fm, - H5D_filtered_collective_io_info_t **chunk_list, - size_t * num_entries); -#if MPI_VERSION >= 3 -static herr_t H5D__chunk_redistribute_shared_chunks(const H5D_io_info_t * io_info, - const H5D_type_info_t * type_info, - const H5D_chunk_map_t * fm, - H5D_filtered_collective_io_info_t *local_chunk_array, - size_t *local_chunk_array_num_entries); -#endif -static herr_t H5D__mpio_array_gatherv(void *local_array, size_t local_array_num_entries, - size_t array_entry_size, void **gathered_array, - size_t *gathered_array_num_entries, hbool_t allgather, int root, - MPI_Comm comm, int (*sort_func)(const void *, const void *)); -static herr_t H5D__mpio_filtered_collective_write_type(H5D_filtered_collective_io_info_t *chunk_list, - size_t num_entries, MPI_Datatype *new_mem_type, - hbool_t *mem_type_derived, MPI_Datatype *new_file_type, - hbool_t *file_type_derived); -static herr_t H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk_entry, - const H5D_io_info_t * io_info, - const H5D_type_info_t * type_info, - const H5D_chunk_map_t * fm); +static herr_t H5D__mpio_collective_filtered_chunk_io_setup(const H5D_io_info_t * io_info, + const H5D_type_info_t * type_info, + const H5D_chunk_map_t * fm, + H5D_filtered_collective_io_info_t **chunk_list, + size_t *num_entries, int mpi_rank); +static herr_t H5D__mpio_redistribute_shared_chunks(H5D_filtered_collective_io_info_t *chunk_list, + size_t chunk_list_num_entries, + const H5D_io_info_t *io_info, const H5D_chunk_map_t *fm, + int mpi_rank, int mpi_size, + size_t **rank_chunks_assigned_map); +static herr_t H5D__mpio_redistribute_shared_chunks_int(H5D_filtered_collective_io_info_t *chunk_list, + size_t * num_chunks_assigned_map, + hbool_t all_ranks_involved, + const H5D_io_info_t * io_info, + const H5D_chunk_map_t *fm, int mpi_rank, int mpi_size); +static herr_t H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk_list, + size_t *chunk_list_num_entries, H5D_io_info_t *io_info, + const H5D_type_info_t *type_info, int mpi_rank, + int mpi_size, + H5D_filtered_collective_io_info_t **chunk_hash_table, + unsigned char *** chunk_msg_bufs, + int * chunk_msg_bufs_len); +static herr_t H5D__mpio_collective_filtered_chunk_common_io(H5D_filtered_collective_io_info_t *chunk_list, + size_t chunk_list_num_entries, + const H5D_io_info_t * io_info, + const H5D_type_info_t *type_info, int mpi_size); +static herr_t H5D__mpio_collective_filtered_chunk_read(H5D_filtered_collective_io_info_t *chunk_list, + size_t chunk_list_num_entries, + const H5D_io_info_t * io_info, + const H5D_type_info_t *type_info, int mpi_rank, + int mpi_size); +static herr_t H5D__mpio_collective_filtered_chunk_update(H5D_filtered_collective_io_info_t *chunk_list, + size_t chunk_list_num_entries, + H5D_filtered_collective_io_info_t *chunk_hash_table, + unsigned char ** chunk_msg_bufs, + int chunk_msg_bufs_len, const H5D_io_info_t *io_info, + const H5D_type_info_t *type_info, int mpi_rank, + int mpi_size); +static herr_t H5D__mpio_collective_filtered_chunk_reallocate(H5D_filtered_collective_io_info_t *chunk_list, + size_t chunk_list_num_entries, + size_t * num_chunks_assigned_map, + H5D_io_info_t * io_info, + H5D_chk_idx_info_t *idx_info, int mpi_rank, + int mpi_size); +static herr_t H5D__mpio_collective_filtered_chunk_reinsert(H5D_filtered_collective_io_info_t *chunk_list, + size_t chunk_list_num_entries, + size_t * num_chunks_assigned_map, + H5D_io_info_t * io_info, + H5D_chk_idx_info_t *idx_info, int mpi_rank, + int mpi_size); +static herr_t H5D__mpio_get_chunk_redistribute_info_types(MPI_Datatype *contig_type, + hbool_t * contig_type_derived, + MPI_Datatype *resized_type, + hbool_t * resized_type_derived); +static herr_t H5D__mpio_get_chunk_alloc_info_types(MPI_Datatype *contig_type, hbool_t *contig_type_derived, + MPI_Datatype *resized_type, hbool_t *resized_type_derived); +static herr_t H5D__mpio_get_chunk_insert_info_types(MPI_Datatype *contig_type, hbool_t *contig_type_derived, + MPI_Datatype *resized_type, + hbool_t * resized_type_derived); +static herr_t H5D__mpio_collective_filtered_io_type(H5D_filtered_collective_io_info_t *chunk_list, + size_t num_entries, H5D_io_op_type_t op_type, + MPI_Datatype *new_mem_type, hbool_t *mem_type_derived, + MPI_Datatype *new_file_type, hbool_t *file_type_derived); static int H5D__cmp_chunk_addr(const void *chunk_addr_info1, const void *chunk_addr_info2); static int H5D__cmp_filtered_collective_io_info_entry(const void *filtered_collective_io_info_entry1, const void *filtered_collective_io_info_entry2); -#if MPI_VERSION >= 3 -static int H5D__cmp_filtered_collective_io_info_entry_owner(const void *filtered_collective_io_info_entry1, - const void *filtered_collective_io_info_entry2); +static int H5D__cmp_chunk_redistribute_info(const void *entry1, const void *entry2); +static int H5D__cmp_chunk_redistribute_info_orig_owner(const void *entry1, const void *entry2); + +#ifdef H5Dmpio_DEBUG +static herr_t H5D__mpio_debug_init(void); +static herr_t H5D__mpio_dump_collective_filtered_chunk_list(H5D_filtered_collective_io_info_t *chunk_list, + size_t chunk_list_num_entries, int mpi_rank); #endif /*********************/ @@ -273,6 +395,188 @@ static int H5D__cmp_filtered_collective_io_info_entry_owner(const void *filtered /* Local Variables */ /*******************/ +/* Declare extern free list to manage the H5S_sel_iter_t struct */ +H5FL_EXTERN(H5S_sel_iter_t); + +#ifdef H5Dmpio_DEBUG + +/* Flags to control debug actions in this file. + * (Meant to be indexed by characters) + * + * These flags can be set with either (or both) the environment variable + * "H5D_mpio_Debug" set to a string containing one or more characters + * (flags) or by setting them as a string value for the + * "H5D_mpio_debug_key" MPI Info key. + * + * Supported characters in 'H5D_mpio_Debug' string: + * 't' trace function entry and exit + * 'f' log to file rather than debugging stream + * 'm' show (rough) memory usage statistics + * 'c' show critical timing information + * + * To only show output from a particular MPI rank, specify its rank + * number as a character, e.g.: + * + * '0' only show output from rank 0 + * + * To only show output from a particular range (up to 8 ranks supported + * between 0-9) of MPI ranks, specify the start and end ranks separated + * by a hyphen, e.g.: + * + * '0-7' only show output from ranks 0 through 7 + * + */ +static int H5D_mpio_debug_flags_s[256]; +static int H5D_mpio_debug_rank_s[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; +static hbool_t H5D_mpio_debug_inited = FALSE; +static const char *const trace_in_pre = "-> "; +static const char *const trace_out_pre = "<- "; +static int debug_indent = 0; +static FILE * debug_stream = NULL; + +/* Determine if this rank should output debugging info */ +#define H5D_MPIO_DEBUG_THIS_RANK(rank) \ + (H5D_mpio_debug_rank_s[0] < 0 || rank == H5D_mpio_debug_rank_s[0] || rank == H5D_mpio_debug_rank_s[1] || \ + rank == H5D_mpio_debug_rank_s[2] || rank == H5D_mpio_debug_rank_s[3] || \ + rank == H5D_mpio_debug_rank_s[4] || rank == H5D_mpio_debug_rank_s[5] || \ + rank == H5D_mpio_debug_rank_s[6] || rank == H5D_mpio_debug_rank_s[7]) + +/* Print some debugging string */ +#define H5D_MPIO_DEBUG(rank, string) \ + do { \ + if (debug_stream && H5D_MPIO_DEBUG_THIS_RANK(rank)) { \ + HDfprintf(debug_stream, "%*s(Rank %d) " string "\n", debug_indent, "", rank); \ + fflush(debug_stream); \ + } \ + } while (0) + +/* Print some debugging string with printf-style arguments */ +#define H5D_MPIO_DEBUG_VA(rank, string, ...) \ + do { \ + if (debug_stream && H5D_MPIO_DEBUG_THIS_RANK(rank)) { \ + HDfprintf(debug_stream, "%*s(Rank %d) " string "\n", debug_indent, "", rank, __VA_ARGS__); \ + fflush(debug_stream); \ + } \ + } while (0) + +#define H5D_MPIO_TRACE_ENTER(rank) \ + do { \ + hbool_t trace_flag = H5D_mpio_debug_flags_s[(int)'t']; \ + \ + if (trace_flag) { \ + H5D_MPIO_DEBUG_VA(rank, "%s%s", trace_in_pre, __func__); \ + debug_indent += (int)strlen(trace_in_pre); \ + } \ + } while (0) + +#define H5D_MPIO_TRACE_EXIT(rank) \ + do { \ + hbool_t trace_flag = H5D_mpio_debug_flags_s[(int)'t']; \ + \ + if (trace_flag) { \ + debug_indent -= (int)strlen(trace_out_pre); \ + H5D_MPIO_DEBUG_VA(rank, "%s%s", trace_out_pre, __func__); \ + } \ + } while (0) + +#define H5D_MPIO_TIME_START(rank, op_name) \ + { \ + hbool_t time_flag = H5D_mpio_debug_flags_s[(int)'c']; \ + double start_time = 0.0, end_time = 0.0; \ + const char *const op = op_name; \ + \ + if (time_flag) { \ + start_time = MPI_Wtime(); \ + } + +#define H5D_MPIO_TIME_STOP(rank) \ + if (time_flag) { \ + end_time = MPI_Wtime(); \ + H5D_MPIO_DEBUG_VA(rank, "'%s' took %f seconds", op, (end_time - start_time)); \ + } \ + } + +/*--------------------------------------------------------------------------- + * Function: H5D__mpio_parse_debug_str + * + * Purpose: Parse a string for H5Dmpio-related debugging flags + * + * Returns: N/A + * + *--------------------------------------------------------------------------- + */ +static void +H5D__mpio_parse_debug_str(const char *s) +{ + FUNC_ENTER_STATIC_NOERR + + HDassert(s); + + while (*s) { + int c = (int)(*s); + + if (c >= (int)'0' && c <= (int)'9') { + hbool_t range = FALSE; + + if (*(s + 1) && *(s + 2)) + range = (int)*(s + 1) == '-' && (int)*(s + 2) >= (int)'0' && (int)*(s + 2) <= (int)'9'; + + if (range) { + int start_rank = c - (int)'0'; + int end_rank = (int)*(s + 2) - '0'; + int num_ranks = end_rank - start_rank + 1; + int i; + + if (num_ranks > 8) { + end_rank = start_rank + 7; + num_ranks = 8; + } + + for (i = 0; i < num_ranks; i++) + H5D_mpio_debug_rank_s[i] = start_rank++; + + s += 3; + } + else + H5D_mpio_debug_rank_s[0] = c - (int)'0'; + } + else + H5D_mpio_debug_flags_s[c]++; + + s++; + } + + FUNC_LEAVE_NOAPI_VOID +} + +static herr_t +H5D__mpio_debug_init(void) +{ + const char *debug_str; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC_NOERR + + HDassert(!H5D_mpio_debug_inited); + + /* Clear the debug flag buffer */ + HDmemset(H5D_mpio_debug_flags_s, 0, sizeof(H5D_mpio_debug_flags_s)); + + /* Retrieve and parse the H5Dmpio debug string */ + debug_str = HDgetenv("H5D_mpio_Debug"); + if (debug_str) + H5D__mpio_parse_debug_str(debug_str); + + if (H5DEBUG(D)) + debug_stream = H5DEBUG(D); + + H5D_mpio_debug_inited = TRUE; + + FUNC_LEAVE_NOAPI(ret_value) +} + +#endif + /*------------------------------------------------------------------------- * Function: H5D__mpio_opt_possible * @@ -347,14 +651,9 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space, co * use collective IO will defer until each chunk IO is reached. */ -#if MPI_VERSION < 3 - /* - * Don't allow parallel writes to filtered datasets if the MPI version - * is less than 3. The functions needed (MPI_Mprobe and MPI_Imrecv) will - * not be available. - */ - if (io_info->op_type == H5D_IO_OP_WRITE && io_info->dset->shared->layout.type == H5D_CHUNKED && - io_info->dset->shared->dcpl_cache.pline.nused > 0) +#ifndef H5_HAVE_PARALLEL_FILTERED_WRITES + /* Don't allow writes to filtered datasets if the functionality is disabled */ + if (io_info->op_type == H5D_IO_OP_WRITE && io_info->dset->shared->dcpl_cache.pline.nused > 0) local_cause[0] |= H5D_MPIO_PARALLEL_FILTERED_WRITES_DISABLED; #endif @@ -437,6 +736,150 @@ done: } /* H5D__mpio_opt_possible() */ /*------------------------------------------------------------------------- + * Function: H5D__mpio_get_no_coll_cause_strings + * + * Purpose: When collective I/O is broken internally, it can be useful + * for users to see a representative string for the reason(s) + * why it was broken. This routine inspects the current + * "cause" flags from the API context and prints strings into + * the caller's buffers for the local and global reasons that + * collective I/O was broken. + * + * Return: Non-negative on success/Negative on failure + * + *------------------------------------------------------------------------- + */ +herr_t +H5D__mpio_get_no_coll_cause_strings(char *local_cause, size_t local_cause_len, char *global_cause, + size_t global_cause_len) +{ + uint32_t local_no_coll_cause; + uint32_t global_no_coll_cause; + size_t local_cause_bytes_written = 0; + size_t global_cause_bytes_written = 0; + int nbits; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_PACKAGE + + HDassert((local_cause && local_cause_len > 0) || (global_cause && global_cause_len > 0)); + + /* + * Use compile-time assertion so this routine is updated + * when any new "no collective cause" values are added + */ + HDcompile_assert(H5D_MPIO_NO_COLLECTIVE_MAX_CAUSE == (H5D_mpio_no_collective_cause_t)256); + + /* Initialize output buffers */ + if (local_cause) + *local_cause = '\0'; + if (global_cause) + *global_cause = '\0'; + + /* Retrieve the local and global cause flags from the API context */ + if (H5CX_get_mpio_local_no_coll_cause(&local_no_coll_cause) < 0) + HGOTO_ERROR(H5E_CONTEXT, H5E_CANTGET, FAIL, "unable to get local no collective cause value") + if (H5CX_get_mpio_global_no_coll_cause(&global_no_coll_cause) < 0) + HGOTO_ERROR(H5E_CONTEXT, H5E_CANTGET, FAIL, "unable to get global no collective cause value") + + /* + * Append each of the "reason for breaking collective I/O" + * error messages to the local and global cause string buffers + */ + nbits = 8 * sizeof(local_no_coll_cause); + for (int bit_pos = 0; bit_pos < nbits; bit_pos++) { + H5D_mpio_no_collective_cause_t cur_cause; + const char * cause_str; + size_t buf_space_left; + + cur_cause = (H5D_mpio_no_collective_cause_t)(1 << bit_pos); + if (cur_cause == H5D_MPIO_NO_COLLECTIVE_MAX_CAUSE) + break; + + switch (cur_cause) { + case H5D_MPIO_SET_INDEPENDENT: + cause_str = "independent I/O was requested"; + break; + case H5D_MPIO_DATATYPE_CONVERSION: + cause_str = "datatype conversions were required"; + break; + case H5D_MPIO_DATA_TRANSFORMS: + cause_str = "data transforms needed to be applied"; + break; + case H5D_MPIO_MPI_OPT_TYPES_ENV_VAR_DISABLED: + cause_str = "optimized MPI types flag wasn't set"; + break; + case H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES: + cause_str = "one of the dataspaces was neither simple nor scalar"; + break; + case H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET: + cause_str = "dataset was not contiguous or chunked"; + break; + case H5D_MPIO_PARALLEL_FILTERED_WRITES_DISABLED: + cause_str = "parallel writes to filtered datasets are disabled"; + break; + case H5D_MPIO_ERROR_WHILE_CHECKING_COLLECTIVE_POSSIBLE: + cause_str = "an error occurred while checking if collective I/O was possible"; + break; + case H5D_MPIO_COLLECTIVE: + case H5D_MPIO_NO_COLLECTIVE_MAX_CAUSE: + default: + HDassert(0 && "invalid no collective cause reason"); + break; + } + + /* + * Determine if the local reasons for breaking collective I/O + * included the current cause + */ + if (local_cause && (cur_cause & local_no_coll_cause)) { + buf_space_left = local_cause_len - local_cause_bytes_written; + + /* + * Check if there were any previous error messages included. If + * so, prepend a semicolon to separate the messages. + */ + if (buf_space_left && local_cause_bytes_written) { + HDstrncat(local_cause, "; ", buf_space_left); + local_cause_bytes_written += MIN(buf_space_left, 2); + buf_space_left -= MIN(buf_space_left, 2); + } + + if (buf_space_left) { + HDstrncat(local_cause, cause_str, buf_space_left); + local_cause_bytes_written += MIN(buf_space_left, HDstrlen(cause_str)); + } + } + + /* + * Determine if the global reasons for breaking collective I/O + * included the current cause + */ + if (global_cause && (cur_cause & global_no_coll_cause)) { + buf_space_left = global_cause_len - global_cause_bytes_written; + + /* + * Check if there were any previous error messages included. If + * so, prepend a semicolon to separate the messages. + */ + if (buf_space_left && global_cause_bytes_written) { + HDstrncat(global_cause, "; ", buf_space_left); + global_cause_bytes_written += MIN(buf_space_left, 2); + buf_space_left -= MIN(buf_space_left, 2); + } + + if (buf_space_left) { + HDstrncat(global_cause, cause_str, buf_space_left); + global_cause_bytes_written += MIN(buf_space_left, HDstrlen(cause_str)); + } + } + } + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__mpio_get_no_coll_cause_strings() */ + +/*------------------------------------------------------------------------- * Function: H5D__mpio_select_read * * Purpose: MPI-IO function to read directly from app buffer to file. @@ -500,145 +943,6 @@ done: } /* end H5D__mpio_select_write() */ /*------------------------------------------------------------------------- - * Function: H5D__mpio_array_gatherv - * - * Purpose: Given an array, specified in local_array, by each processor - * calling this function, collects each array into a single - * array which is then either gathered to the processor - * specified by root, when allgather is false, or is - * distributed back to all processors when allgather is true. - * - * The number of entries in the array contributed by an - * individual processor and the size of each entry should be - * specified in local_array_num_entries and array_entry_size, - * respectively. - * - * The MPI communicator to use should be specified for comm. - * - * If the sort_func argument is supplied, the array is sorted - * before the function returns. - * - * Note: if allgather is specified as true, root is ignored. - * - * Return: Non-negative on success/Negative on failure - * - * Programmer: Jordan Henderson - * Sunday, April 9th, 2017 - * - *------------------------------------------------------------------------- - */ -static herr_t -H5D__mpio_array_gatherv(void *local_array, size_t local_array_num_entries, size_t array_entry_size, - void **_gathered_array, size_t *_gathered_array_num_entries, hbool_t allgather, - int root, MPI_Comm comm, H5D_mpio_sort_func_cb_t sort_func) -{ - size_t gathered_array_num_entries = 0; /* The size of the newly-constructed array */ - void * gathered_array = NULL; /* The newly-constructed array returned to the caller */ - int *receive_counts_array = NULL; /* Array containing number of entries each processor is contributing */ - int *displacements_array = - NULL; /* Array of displacements where each processor places its data in the final array */ - int mpi_code, mpi_rank, mpi_size; - int sendcount; - herr_t ret_value = SUCCEED; - - FUNC_ENTER_STATIC - - HDassert(_gathered_array); - HDassert(_gathered_array_num_entries); - - MPI_Comm_size(comm, &mpi_size); - MPI_Comm_rank(comm, &mpi_rank); - - /* Determine the size of the end result array by collecting the number - * of entries contributed by each processor into a single total. - */ - if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_array_num_entries, &gathered_array_num_entries, 1, - MPI_INT, MPI_SUM, comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code) - - /* If 0 entries resulted from the collective operation, no processor is contributing anything and there is - * nothing to do */ - if (gathered_array_num_entries > 0) { - /* - * If gathering to all processors, all processors need to allocate space for the resulting array, as - * well as the receive counts and displacements arrays for the collective MPI_Allgatherv call. - * Otherwise, only the root processor needs to allocate the space for an MPI_Gatherv call. - */ - if (allgather || (mpi_rank == root)) { - if (NULL == (gathered_array = H5MM_malloc(gathered_array_num_entries * array_entry_size))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate gathered array") - - if (NULL == (receive_counts_array = (int *)H5MM_malloc((size_t)mpi_size * sizeof(int)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate receive counts array") - - if (NULL == (displacements_array = (int *)H5MM_malloc((size_t)mpi_size * sizeof(int)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate receive displacements array") - } /* end if */ - - /* - * If gathering to all processors, inform each processor of how many entries each other processor is - * contributing to the resulting array by collecting the counts into each processor's "receive counts" - * array. Otherwise, inform only the root processor of how many entries each other processor is - * contributing. - */ - if (allgather) { - if (MPI_SUCCESS != (mpi_code = MPI_Allgather(&local_array_num_entries, 1, MPI_INT, - receive_counts_array, 1, MPI_INT, comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code) - } /* end if */ - else { - if (MPI_SUCCESS != (mpi_code = MPI_Gather(&local_array_num_entries, 1, MPI_INT, - receive_counts_array, 1, MPI_INT, root, comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Gather failed", mpi_code) - } /* end else */ - - if (allgather || (mpi_rank == root)) { - size_t i; - - /* Multiply each receive count by the size of the array entry, since the data is sent as bytes. */ - for (i = 0; i < (size_t)mpi_size; i++) - H5_CHECKED_ASSIGN(receive_counts_array[i], int, - (size_t)receive_counts_array[i] * array_entry_size, size_t); - - /* Set receive buffer offsets for the collective MPI_Allgatherv/MPI_Gatherv call. */ - displacements_array[0] = 0; - for (i = 1; i < (size_t)mpi_size; i++) - displacements_array[i] = displacements_array[i - 1] + receive_counts_array[i - 1]; - } /* end if */ - - /* As the data is sent as bytes, calculate the true sendcount for the data. */ - H5_CHECKED_ASSIGN(sendcount, int, local_array_num_entries *array_entry_size, size_t); - - if (allgather) { - if (MPI_SUCCESS != - (mpi_code = MPI_Allgatherv(local_array, sendcount, MPI_BYTE, gathered_array, - receive_counts_array, displacements_array, MPI_BYTE, comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Allgatherv failed", mpi_code) - } /* end if */ - else { - if (MPI_SUCCESS != - (mpi_code = MPI_Gatherv(local_array, sendcount, MPI_BYTE, gathered_array, - receive_counts_array, displacements_array, MPI_BYTE, root, comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Gatherv failed", mpi_code) - } /* end else */ - - if (sort_func && (allgather || (mpi_rank == root))) - HDqsort(gathered_array, gathered_array_num_entries, array_entry_size, sort_func); - } /* end if */ - - *_gathered_array = gathered_array; - *_gathered_array_num_entries = gathered_array_num_entries; - -done: - if (receive_counts_array) - H5MM_free(receive_counts_array); - if (displacements_array) - H5MM_free(displacements_array); - - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5D__mpio_array_gatherv() */ - -/*------------------------------------------------------------------------- * Function: H5D__mpio_get_sum_chunk * * Purpose: Routine for obtaining total number of chunks to cover @@ -793,11 +1097,17 @@ static herr_t H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, H5D_chunk_map_t *fm) { H5FD_mpio_chunk_opt_t chunk_opt_mode; - int io_option = H5D_MULTI_CHUNK_IO_MORE_OPT; - int sum_chunk = -1; +#ifdef H5Dmpio_DEBUG + hbool_t log_file_flag = FALSE; + FILE * debug_log_file = NULL; +#endif #ifdef H5_HAVE_INSTRUMENTED_LIBRARY htri_t temp_not_link_io = FALSE; #endif + int io_option = H5D_MULTI_CHUNK_IO_MORE_OPT; + int sum_chunk = -1; + int mpi_rank; + int mpi_size; herr_t ret_value = SUCCEED; FUNC_ENTER_STATIC @@ -808,6 +1118,36 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf HDassert(type_info); HDassert(fm); + /* Obtain the current rank of the process and the number of ranks */ + if ((mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file)) < 0) + HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain MPI rank") + if ((mpi_size = H5F_mpi_get_size(io_info->dset->oloc.file)) < 0) + HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain MPI size") + +#ifdef H5Dmpio_DEBUG + /* Initialize file-level debugging if not initialized */ + if (!H5D_mpio_debug_inited && H5D__mpio_debug_init() < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't initialize H5Dmpio debugging") + + /* Open file for debugging if necessary */ + log_file_flag = H5D_mpio_debug_flags_s[(int)'f']; + if (log_file_flag) { + char debug_log_filename[1024]; + time_t time_now; + + HDsnprintf(debug_log_filename, 1024, "H5Dmpio_debug.rank%d", mpi_rank); + + if (NULL == (debug_log_file = HDfopen(debug_log_filename, "a"))) + HGOTO_ERROR(H5E_IO, H5E_OPENERROR, FAIL, "couldn't open debugging log file") + + /* Print a short header for this I/O operation */ + time_now = time(NULL); + HDfprintf(debug_log_file, "##### %s", asctime(localtime(&time_now))); + + debug_stream = debug_log_file; + } +#endif + /* Check the optional property list for the collective chunk IO optimization option */ if (H5CX_get_mpio_chunk_opt_mode(&chunk_opt_mode) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "couldn't get chunk optimization option") @@ -820,13 +1160,10 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf /* via default path. branch by num threshold */ else { unsigned one_link_chunk_io_threshold; /* Threshold to use single collective I/O for all chunks */ - int mpi_size; /* Number of processes in MPI job */ if (H5D__mpio_get_sum_chunk(io_info, fm, &sum_chunk) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_CANTSWAP, FAIL, "unable to obtain the total chunk number of all processes"); - if ((mpi_size = H5F_mpi_get_size(io_info->dset->oloc.file)) < 0) - HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi size") /* Get the chunk optimization option threshold */ if (H5CX_get_mpio_chunk_opt_num(&one_link_chunk_io_threshold) < 0) @@ -872,22 +1209,12 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf case H5D_ONE_LINK_CHUNK_IO_MORE_OPT: /* Check if there are any filters in the pipeline */ if (io_info->dset->shared->dcpl_cache.pline.nused > 0) { - /* For now, Multi-chunk IO must be forced for parallel filtered read, - * so that data can be unfiltered as it is received. There is significant - * complexity in unfiltering the data when it is read all at once into a - * single buffer. - */ - if (io_info->op_type == H5D_IO_OP_READ) { - if (H5D__multi_chunk_filtered_collective_io(io_info, type_info, fm) < 0) - HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, - "couldn't finish optimized multiple filtered chunk MPI-IO") - } /* end if */ - else if (H5D__link_chunk_filtered_collective_io(io_info, type_info, fm) < 0) + if (H5D__link_chunk_filtered_collective_io(io_info, type_info, fm, mpi_rank, mpi_size) < 0) HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish filtered linked chunk MPI-IO") } /* end if */ else /* Perform unfiltered link chunk collective IO */ - if (H5D__link_chunk_collective_io(io_info, type_info, fm, sum_chunk) < 0) + if (H5D__link_chunk_collective_io(io_info, type_info, fm, sum_chunk, mpi_rank, mpi_size) < 0) HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish linked chunk MPI-IO") break; @@ -895,18 +1222,28 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf default: /* multiple chunk IO via threshold */ /* Check if there are any filters in the pipeline */ if (io_info->dset->shared->dcpl_cache.pline.nused > 0) { - if (H5D__multi_chunk_filtered_collective_io(io_info, type_info, fm) < 0) + if (H5D__multi_chunk_filtered_collective_io(io_info, type_info, fm, mpi_rank, mpi_size) < 0) HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish optimized multiple filtered chunk MPI-IO") } /* end if */ else /* Perform unfiltered multi chunk collective IO */ - if (H5D__multi_chunk_collective_io(io_info, type_info, fm) < 0) + if (H5D__multi_chunk_collective_io(io_info, type_info, fm, mpi_rank, mpi_size) < 0) HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish optimized multiple chunk MPI-IO") break; } /* end switch */ done: +#ifdef H5Dmpio_DEBUG + /* Close debugging log file */ + if (debug_log_file) { + HDfprintf(debug_log_file, "##############\n\n"); + if (EOF == HDfclose(debug_log_file)) + HDONE_ERROR(H5E_IO, H5E_CLOSEERROR, FAIL, "couldn't close debugging log file") + debug_stream = H5DEBUG(D); + } +#endif + FUNC_LEAVE_NOAPI(ret_value) } /* end H5D__chunk_collective_io */ @@ -989,7 +1326,7 @@ done: */ static herr_t H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, H5D_chunk_map_t *fm, - int sum_chunk) + int sum_chunk, int mpi_rank, int mpi_size) { H5D_chunk_addr_info_t *chunk_addr_info_array = NULL; MPI_Datatype chunk_final_mtype; /* Final memory MPI datatype for all chunks with selection */ @@ -1070,9 +1407,8 @@ H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *typ /* Set up the base storage address for this chunk */ io_info->store = &ctg_store; -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "before inter_collective_io for total chunk = 1 \n"); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_DEBUG(mpi_rank, "before inter_collective_io for total chunk = 1"); #endif /* Perform I/O */ @@ -1088,9 +1424,8 @@ H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *typ num_chunk = H5SL_count(fm->sel_chunks); H5_CHECK_OVERFLOW(num_chunk, size_t, int); -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "total_chunks = %zu, num_chunk = %zu\n", total_chunks, num_chunk); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_DEBUG_VA(mpi_rank, "total_chunks = %zu, num_chunk = %zu", total_chunks, num_chunk); #endif /* Set up MPI datatype for chunks selected */ @@ -1121,18 +1456,17 @@ H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *typ HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk file is derived datatype flags buffer") -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "before sorting the chunk address \n"); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_DEBUG(mpi_rank, "before sorting chunk addresses"); #endif + /* Sort the chunk address */ - if (H5D__sort_chunk(io_info, fm, chunk_addr_info_array, sum_chunk) < 0) + if (H5D__sort_chunk(io_info, fm, chunk_addr_info_array, sum_chunk, mpi_rank, mpi_size) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_CANTSWAP, FAIL, "unable to sort chunk address") ctg_store.contig.dset_addr = chunk_addr_info_array[0].chunk_addr; -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "after sorting the chunk address \n"); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_DEBUG(mpi_rank, "after sorting chunk addresses"); #endif /* Obtain MPI derived datatype from all individual chunks */ @@ -1237,9 +1571,9 @@ H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *typ /* No chunks selected for this process */ mpi_buf_count = (hsize_t)0; } /* end else */ -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "before coming to final collective IO\n"); + +#ifdef H5Dmpio_DEBUG + H5D_MPIO_DEBUG(mpi_rank, "before coming to final collective I/O"); #endif /* Set up the base storage address for this chunk */ @@ -1252,11 +1586,11 @@ H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *typ } /* end else */ done: -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "before freeing memory inside H5D_link_collective_io ret_value = %d\n", - ret_value); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_DEBUG_VA(mpi_rank, "before freeing memory inside H5D_link_collective_io ret_value = %d", + ret_value); #endif + /* Release resources */ if (chunk_addr_info_array) H5MM_xfree(chunk_addr_info_array); @@ -1289,68 +1623,89 @@ done: /*------------------------------------------------------------------------- * Function: H5D__link_chunk_filtered_collective_io * - * Purpose: Routine for one collective IO with one MPI derived datatype - * to link with all filtered chunks - * - * 1. Construct a list of selected chunks in the collective IO - * operation - * A. If any chunk is being written to by more than 1 - * process, the process writing to the chunk which - * currently has the least amount of chunks assigned - * to it becomes the new owner (in the case of ties, - * the lowest MPI rank becomes the new owner) - * 2. If the operation is a write operation - * A. Loop through each chunk in the operation - * I. If this is not a full overwrite of the chunk - * a) Read the chunk from file and pass the chunk - * through the filter pipeline in reverse order - * (Unfilter the chunk) + * Purpose: Performs collective I/O on filtered chunks by creating a + * single MPI derived datatype to link with all filtered + * chunks. The general algorithm is as follows: + * + * 1. Construct a list of selected chunks in the collective + * I/O operation + * 2. If the operation is a read operation + * A. Ensure that the list of chunks is sorted in + * monotonically non-decreasing order of chunk offset + * in the file + * B. Participate in a collective read of chunks from + * the file + * C. Loop through each selected chunk, unfiltering it and + * scattering the data to the application's read buffer + * 3. If the operation is a write operation + * A. Redistribute any chunks being written by more than 1 + * MPI rank, such that the chunk is only owned by 1 MPI + * rank. The rank writing to the chunk which currently + * has the least amount of chunks assigned to it becomes + * the new owner (in the case of ties, the lowest MPI + * rank becomes the new owner) + * B. Participate in a collective read of chunks from the + * file + * C. Loop through each chunk selected in the operation + * and for each chunk: + * I. If we actually read the chunk from the file (if + * a chunk is being fully overwritten, we skip + * reading it), pass the chunk through the filter + * pipeline in reverse order (unfilter the chunk) * II. Update the chunk data with the modifications from - * the owning process + * the owning MPI rank * III. Receive any modification data from other - * processes and update the chunk data with these + * ranks and update the chunk data with those * modifications * IV. Filter the chunk - * B. Contribute the modified chunks to an array gathered - * by all processes which contains the new sizes of - * every chunk modified in the collective IO operation - * C. All processes collectively re-allocate each chunk - * from the gathered array with their new sizes after - * the filter operation - * D. If this process has any chunks selected in the IO - * operation, create an MPI derived type for memory and - * file to write out the process' selected chunks to the - * file - * E. Perform the collective write - * F. All processes collectively re-insert each modified + * D. Contribute the modified chunks to an array gathered + * by all ranks which contains information for + * re-allocating space in the file for every chunk + * modified. Then, each rank collectively re-allocates + * each chunk from the gathered array with their new + * sizes after the filter operation + * E. Proceed with the collective write operation for all + * the modified chunks + * F. Contribute the modified chunks to an array gathered + * by all ranks which contains information for + * re-inserting every chunk modified into the chunk + * index. Then, each rank collectively re-inserts each * chunk from the gathered array into the chunk index * + * TODO: Note that steps D. and F. here are both collective + * operations that partially share data from the + * H5D_filtered_collective_io_info_t structure. To + * try to conserve on memory a bit, the distributed + * arrays these operations create are discarded after + * each operation is performed. If memory consumption + * here proves to not be an issue, the necessary data + * for both operations could be combined into a single + * structure so that only one collective MPI operation + * is needed to carry out both operations, rather than + * two. * * Return: Non-negative on success/Negative on failure * - * Programmer: Jordan Henderson - * Friday, Nov. 4th, 2016 - * *------------------------------------------------------------------------- */ static herr_t H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, - H5D_chunk_map_t *fm) + H5D_chunk_map_t *fm, int mpi_rank, int mpi_size) { - H5D_filtered_collective_io_info_t *chunk_list = NULL; /* The list of chunks being read/written */ - H5D_filtered_collective_io_info_t *collective_chunk_list = - NULL; /* The list of chunks used during collective operations */ - H5D_storage_t ctg_store; /* Chunk storage information as contiguous dataset */ - MPI_Datatype mem_type = MPI_BYTE; - MPI_Datatype file_type = MPI_BYTE; - hbool_t mem_type_is_derived = FALSE; - hbool_t file_type_is_derived = FALSE; - size_t chunk_list_num_entries; - size_t collective_chunk_list_num_entries; - size_t * num_chunks_selected_array = NULL; /* Array of number of chunks selected on each process */ - size_t i; /* Local index variable */ - int mpi_rank, mpi_size, mpi_code; - herr_t ret_value = SUCCEED; + H5D_filtered_collective_io_info_t *chunk_list = NULL; /* The list of chunks being read/written */ + H5D_filtered_collective_io_info_t *chunk_hash_table = NULL; + unsigned char ** chunk_msg_bufs = NULL; + H5D_storage_t ctg_store; /* Chunk storage information as contiguous dataset */ + MPI_Datatype mem_type = MPI_BYTE; + MPI_Datatype file_type = MPI_BYTE; + hbool_t mem_type_is_derived = FALSE; + hbool_t file_type_is_derived = FALSE; + size_t * rank_chunks_assigned_map = NULL; + size_t chunk_list_num_entries; + size_t i; + int chunk_msg_bufs_len = 0; + int mpi_code; + herr_t ret_value = SUCCEED; FUNC_ENTER_STATIC @@ -1358,11 +1713,12 @@ H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_in HDassert(type_info); HDassert(fm); - /* Obtain the current rank of the process and the number of processes */ - if ((mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file)) < 0) - HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi rank") - if ((mpi_size = H5F_mpi_get_size(io_info->dset->oloc.file)) < 0) - HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi size") +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TRACE_ENTER(mpi_rank); + H5D_MPIO_DEBUG_VA(mpi_rank, "Performing Linked-chunk I/O (%s) with MPI Comm size of %d", + io_info->op_type == H5D_IO_OP_WRITE ? "write" : "read", mpi_size); + H5D_MPIO_TIME_START(mpi_rank, "Linked-chunk I/O"); +#endif /* Set the actual-chunk-opt-mode property. */ H5CX_set_mpio_actual_chunk_opt(H5D_MPIO_LINK_CHUNK); @@ -1373,123 +1729,127 @@ H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_in H5CX_set_mpio_actual_io_mode(H5D_MPIO_CHUNK_COLLECTIVE); /* Build a list of selected chunks in the collective io operation */ - if (H5D__construct_filtered_io_info_list(io_info, type_info, fm, &chunk_list, &chunk_list_num_entries) < - 0) + if (H5D__mpio_collective_filtered_chunk_io_setup(io_info, type_info, fm, &chunk_list, + &chunk_list_num_entries, mpi_rank) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "couldn't construct filtered I/O info list") - if (io_info->op_type == H5D_IO_OP_WRITE) { /* Filtered collective write */ + if (io_info->op_type == H5D_IO_OP_READ) { /* Filtered collective read */ + if (H5D__mpio_collective_filtered_chunk_read(chunk_list, chunk_list_num_entries, io_info, type_info, + mpi_rank, mpi_size) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't read filtered chunks") + } + else { /* Filtered collective write */ H5D_chk_idx_info_t index_info; - H5D_chunk_ud_t udata; hsize_t mpi_buf_count; - /* Construct chunked index info */ - index_info.f = io_info->dset->oloc.file; - index_info.pline = &(io_info->dset->shared->dcpl_cache.pline); - index_info.layout = &(io_info->dset->shared->layout.u.chunk); - index_info.storage = &(io_info->dset->shared->layout.storage.u.chunk); - - /* Set up chunk information for insertion to chunk index */ - udata.common.layout = index_info.layout; - udata.common.storage = index_info.storage; - udata.filter_mask = 0; - - /* Iterate through all the chunks in the collective write operation, - * updating each chunk with the data modifications from other processes, - * then re-filtering the chunk. - */ - for (i = 0; i < chunk_list_num_entries; i++) - if (mpi_rank == chunk_list[i].owners.new_owner) - if (H5D__filtered_collective_chunk_entry_io(&chunk_list[i], io_info, type_info, fm) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't process chunk entry") + H5D_MPIO_INIT_CHUNK_IDX_INFO(index_info, io_info); - /* Gather the new chunk sizes to all processes for a collective reallocation - * of the chunks in the file. - */ - if (H5D__mpio_array_gatherv(chunk_list, chunk_list_num_entries, - sizeof(H5D_filtered_collective_io_info_t), - (void **)&collective_chunk_list, &collective_chunk_list_num_entries, true, - 0, io_info->comm, NULL) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, "couldn't gather new chunk sizes") - - /* Collectively re-allocate the modified chunks (from each process) in the file */ - for (i = 0; i < collective_chunk_list_num_entries; i++) { - hbool_t insert; - - if (H5D__chunk_file_alloc(&index_info, &collective_chunk_list[i].chunk_states.chunk_current, - &collective_chunk_list[i].chunk_states.new_chunk, &insert, - collective_chunk_list[i].scaled) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate chunk") - } /* end for */ + if (mpi_size > 1) { + /* Redistribute shared chunks being written to */ + if (H5D__mpio_redistribute_shared_chunks(chunk_list, chunk_list_num_entries, io_info, fm, + mpi_rank, mpi_size, &rank_chunks_assigned_map) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "unable to redistribute shared chunks") - if (NULL == (num_chunks_selected_array = (size_t *)H5MM_malloc((size_t)mpi_size * sizeof(size_t)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate num chunks selected array") + /* Send any chunk modification messages for chunks this rank no longer owns */ + if (H5D__mpio_share_chunk_modification_data(chunk_list, &chunk_list_num_entries, io_info, + type_info, mpi_rank, mpi_size, &chunk_hash_table, + &chunk_msg_bufs, &chunk_msg_bufs_len) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, + "unable to send chunk modification data between MPI ranks") - if (MPI_SUCCESS != - (mpi_code = MPI_Allgather(&chunk_list_num_entries, 1, MPI_UNSIGNED_LONG_LONG, - num_chunks_selected_array, 1, MPI_UNSIGNED_LONG_LONG, io_info->comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code) + /* Make sure the local chunk list was updated correctly */ + HDassert(chunk_list_num_entries == rank_chunks_assigned_map[mpi_rank]); + } - /* If this process has any chunks selected, create a MPI type for collectively - * writing out the chunks to file. Otherwise, the process contributes to the + /* Proceed to update all the chunks this rank owns with its own + * modification data and data from other ranks, before re-filtering + * the chunks. As chunk reads are done collectively here, all ranks + * must participate. + */ + if (H5D__mpio_collective_filtered_chunk_update(chunk_list, chunk_list_num_entries, chunk_hash_table, + chunk_msg_bufs, chunk_msg_bufs_len, io_info, type_info, + mpi_rank, mpi_size) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't update modified chunks") + + /* Free up resources used by chunk hash table now that we're done updating chunks */ + HASH_CLEAR(hh, chunk_hash_table); + + /* All ranks now collectively re-allocate file space for all chunks */ + if (H5D__mpio_collective_filtered_chunk_reallocate(chunk_list, chunk_list_num_entries, + rank_chunks_assigned_map, io_info, &index_info, + mpi_rank, mpi_size) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, + "couldn't collectively re-allocate file space for chunks") + + /* If this rank has any chunks selected, create a MPI type for collectively + * writing out the chunks to file. Otherwise, the rank contributes to the * collective write with a none type. */ - if (chunk_list_num_entries) { - size_t offset; - - /* During the collective re-allocation of chunks in the file, the record for each - * chunk is only updated in the collective array, not in the local copy of chunks on each - * process. However, each process needs the updated chunk records so that they can create - * a MPI type for the collective write that will write to the chunk's possible new locations - * in the file instead of the old ones. This ugly hack seems to be the best solution to - * copy the information back to the local array and avoid having to modify the collective - * write type function in an ugly way so that it will accept the collective array instead - * of the local array. This works correctly because the array gather function guarantees - * that the chunk data in the collective array is ordered in blocks by rank. - */ - for (i = 0, offset = 0; i < (size_t)mpi_rank; i++) - offset += num_chunks_selected_array[i]; - - H5MM_memcpy(chunk_list, &collective_chunk_list[offset], - num_chunks_selected_array[mpi_rank] * sizeof(H5D_filtered_collective_io_info_t)); + if (H5D__mpio_collective_filtered_io_type(chunk_list, chunk_list_num_entries, io_info->op_type, + &mem_type, &mem_type_is_derived, &file_type, + &file_type_is_derived) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, + "couldn't create MPI type for writing filtered chunks") - /* Create single MPI type encompassing each selection in the dataspace */ - if (H5D__mpio_filtered_collective_write_type(chunk_list, chunk_list_num_entries, &mem_type, - &mem_type_is_derived, &file_type, - &file_type_is_derived) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_BADTYPE, FAIL, "couldn't create MPI link chunk I/O type") + mpi_buf_count = (file_type_is_derived || mem_type_is_derived) ? 1 : 0; - /* Override the write buffer to point to the address of the first - * chunk data buffer + /* Setup contig storage info for I/O operation */ + if (chunk_list_num_entries) { + /* + * Override the write buffer to point to the first + * chunk's data buffer */ io_info->u.wbuf = chunk_list[0].buf; - } /* end if */ - - /* We have a single, complicated MPI datatype for both memory & file */ - mpi_buf_count = (mem_type_is_derived && file_type_is_derived) ? (hsize_t)1 : (hsize_t)0; - /* Set up the base storage address for this operation */ - ctg_store.contig.dset_addr = 0; /* Write address must be set to address 0 */ - io_info->store = &ctg_store; + /* + * Setup the base storage address for this operation + * to be the first chunk's file address + */ + ctg_store.contig.dset_addr = chunk_list[0].chunk_new.offset; + } + else + ctg_store.contig.dset_addr = 0; /* Perform I/O */ + io_info->store = &ctg_store; if (H5D__final_collective_io(io_info, type_info, mpi_buf_count, file_type, mem_type) < 0) HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish MPI-IO") + /* Free up resources in anticipation of following collective operation */ + for (i = 0; i < chunk_list_num_entries; i++) { + if (chunk_list[i].buf) { + H5MM_free(chunk_list[i].buf); + chunk_list[i].buf = NULL; + } + } + /* Participate in the collective re-insertion of all chunks modified - * in this iteration into the chunk index + * into the chunk index */ - for (i = 0; i < collective_chunk_list_num_entries; i++) { - udata.chunk_block = collective_chunk_list[i].chunk_states.new_chunk; - udata.common.scaled = collective_chunk_list[i].scaled; - udata.chunk_idx = collective_chunk_list[i].index; - - if ((index_info.storage->ops->insert)(&index_info, &udata, io_info->dset) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTINSERT, FAIL, "unable to insert chunk address into index") - } /* end for */ - } /* end if */ + if (H5D__mpio_collective_filtered_chunk_reinsert(chunk_list, chunk_list_num_entries, + rank_chunks_assigned_map, io_info, &index_info, + mpi_rank, mpi_size) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, + "couldn't collectively re-insert modified chunks into chunk index") + } done: - /* Free resources used by a process which had some selection */ + /* Free the MPI buf and file types, if they were derived */ + if (mem_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + if (file_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&file_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + + if (chunk_msg_bufs) { + for (i = 0; i < (size_t)chunk_msg_bufs_len; i++) + H5MM_free(chunk_msg_bufs[i]); + + H5MM_free(chunk_msg_bufs); + } + + HASH_CLEAR(hh, chunk_hash_table); + + /* Free resources used by a rank which had some selection */ if (chunk_list) { for (i = 0; i < chunk_list_num_entries; i++) if (chunk_list[i].buf) @@ -1498,16 +1858,13 @@ done: H5MM_free(chunk_list); } /* end if */ - if (num_chunks_selected_array) - H5MM_free(num_chunks_selected_array); - if (collective_chunk_list) - H5MM_free(collective_chunk_list); + if (rank_chunks_assigned_map) + H5MM_free(rank_chunks_assigned_map); - /* Free the MPI buf and file types, if they were derived */ - if (mem_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type))) - HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) - if (file_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&file_type))) - HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TIME_STOP(mpi_rank); + H5D_MPIO_TRACE_EXIT(mpi_rank); +#endif FUNC_LEAVE_NOAPI(ret_value) } /* end H5D__link_chunk_filtered_collective_io() */ @@ -1530,7 +1887,8 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, H5D_chunk_map_t *fm) +H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, H5D_chunk_map_t *fm, + int mpi_rank, int mpi_size) { H5D_io_info_t ctg_io_info; /* Contiguous I/O info object */ H5D_storage_t ctg_store; /* Chunk storage information as contiguous dataset */ @@ -1543,11 +1901,8 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty H5FD_mpio_collective_opt_t last_coll_opt_mode = H5FD_MPIO_COLLECTIVE_IO; /* Last parallel transfer with independent IO or collective IO with this mode */ - size_t total_chunk; /* Total # of chunks in dataset */ -#ifdef H5Dmpio_DEBUG - int mpi_rank; -#endif - size_t u; /* Local index variable */ + size_t total_chunk; /* Total # of chunks in dataset */ + size_t u; /* Local index variable */ H5D_mpio_actual_io_mode_t actual_io_mode = H5D_MPIO_NO_COLLECTIVE; /* Local variable for tracking the I/O mode used. */ herr_t ret_value = SUCCEED; @@ -1557,10 +1912,6 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty /* Set the actual chunk opt mode property */ H5CX_set_mpio_actual_chunk_opt(H5D_MPIO_MULTI_CHUNK); -#ifdef H5Dmpio_DEBUG - mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file); -#endif - /* Retrieve total # of chunks in dataset */ H5_CHECKED_ASSIGN(total_chunk, size_t, fm->layout->u.chunk.nchunks, hsize_t); HDassert(total_chunk != 0); @@ -1568,13 +1919,13 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty /* Allocate memories */ chunk_io_option = (uint8_t *)H5MM_calloc(total_chunk); chunk_addr = (haddr_t *)H5MM_calloc(total_chunk * sizeof(haddr_t)); -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "total_chunk %zu\n", total_chunk); + +#ifdef H5Dmpio_DEBUG + H5D_MPIO_DEBUG_VA(mpi_rank, "total_chunk %zu", total_chunk); #endif /* Obtain IO option for each chunk */ - if (H5D__obtain_mpio_mode(io_info, fm, chunk_io_option, chunk_addr) < 0) + if (H5D__obtain_mpio_mode(io_info, fm, chunk_io_option, chunk_addr, mpi_rank, mpi_size) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTRECV, FAIL, "unable to obtain MPIO mode") /* Set up contiguous I/O info object */ @@ -1602,9 +1953,8 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty H5S_t * fspace; /* Dataspace describing chunk & selection in it */ H5S_t * mspace; /* Dataspace describing selection in memory corresponding to this chunk */ -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "mpi_rank = %d, chunk index = %zu\n", mpi_rank, u); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_DEBUG_VA(mpi_rank, "mpi_rank = %d, chunk index = %zu", mpi_rank, u); #endif /* Get the chunk info for this chunk, if there are elements selected */ chunk_info = fm->select_chunk[u]; @@ -1622,10 +1972,9 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty * needs to contribute MPI NONE TYPE. */ if (chunk_io_option[u] == H5D_CHUNK_IO_MODE_COL) { -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "inside collective chunk IO mpi_rank = %d, chunk index = %zu\n", - mpi_rank, u); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_DEBUG_VA(mpi_rank, "inside collective chunk IO mpi_rank = %d, chunk index = %zu", + mpi_rank, u); #endif /* Set the file & memory dataspaces */ @@ -1661,10 +2010,9 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish shared collective MPI-IO") } /* end if */ else { /* possible independent IO for this chunk */ -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "inside independent IO mpi_rank = %d, chunk index = %zu\n", mpi_rank, - u); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_DEBUG_VA(mpi_rank, "inside independent IO mpi_rank = %d, chunk index = %zu", mpi_rank, + u); #endif HDassert(chunk_io_option[u] == 0); @@ -1694,9 +2042,8 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty /* Perform the I/O */ if (H5D__inter_collective_io(&ctg_io_info, type_info, fspace, mspace) < 0) HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish shared collective MPI-IO") -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "after inter collective IO\n"); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_DEBUG(mpi_rank, "after inter collective IO"); #endif } /* end else */ } /* end for */ @@ -1716,80 +2063,101 @@ done: /*------------------------------------------------------------------------- * Function: H5D__multi_chunk_filtered_collective_io * - * Purpose: To do filtered collective IO iteratively to save on memory. - * While link_chunk_filtered_collective_io will construct and - * work on a list of all of the chunks selected in the IO - * operation at once, this function works iteratively on a set - * of chunks at a time; at most one chunk per rank per - * iteration. - * - * 1. Construct a list of selected chunks in the collective IO - * operation - * A. If any chunk is being written to by more than 1 - * process, the process writing to the chunk which - * currently has the least amount of chunks assigned - * to it becomes the new owner (in the case of ties, - * the lowest MPI rank becomes the new owner) - * 2. If the operation is a read operation - * A. Loop through each chunk in the operation - * I. Read the chunk from the file - * II. Unfilter the chunk - * III. Scatter the read chunk data to the user's buffer - * 3. If the operation is a write operation - * A. Loop through each chunk in the operation - * I. If this is not a full overwrite of the chunk - * a) Read the chunk from file and pass the chunk - * through the filter pipeline in reverse order - * (Unfilter the chunk) - * II. Update the chunk data with the modifications from - * the owning process - * III. Receive any modification data from other - * processes and update the chunk data with these - * modifications - * IV. Filter the chunk - * V. Contribute the chunk to an array gathered by - * all processes which contains every chunk - * modified in this iteration (up to one chunk - * per process, some processes may not have a - * selection/may have less chunks to work on than - * other processes) - * VI. All processes collectively re-allocate each - * chunk from the gathered array with their new - * sizes after the filter operation - * VII. Proceed with the collective write operation - * for the chunks modified on this iteration - * VIII. All processes collectively re-insert each - * chunk from the gathered array into the chunk - * index + * Purpose: Performs collective I/O on filtered chunks iteratively to + * save on memory and potentially get better performance + * depending on the average number of chunks per rank. While + * linked-chunk I/O will construct and work on a list of all + * of the chunks selected in the I/O operation at once, this + * function works iteratively on a set of chunks at a time; at + * most one chunk per rank per iteration. The general + * algorithm is as follows: + * + * 1. Construct a list of selected chunks in the collective + * I/O operation + * 2. If the operation is a read operation, loop an amount of + * times equal to the maximum number of chunks selected on + * any particular rank and on each iteration: + * A. Participate in a collective read of chunks from + * the file (ranks that run out of chunks still need + * to participate) + * B. Unfilter the chunk that was read (if any) + * C. Scatter the read chunk's data to the application's + * read buffer + * 3. If the operation is a write operation, redistribute any + * chunks being written to by more than 1 MPI rank, such + * that the chunk is only owned by 1 MPI rank. The rank + * writing to the chunk which currently has the least + * amount of chunks assigned to it becomes the new owner + * (in the case of ties, the lowest MPI rank becomes the + * new owner). Then, loop an amount of times equal to the + * maximum number of chunks selected on any particular + * rank and on each iteration: + * A. Participate in a collective read of chunks from + * the file (ranks that run out of chunks still need + * to participate) + * I. If we actually read a chunk from the file (if + * a chunk is being fully overwritten, we skip + * reading it), pass the chunk through the filter + * pipeline in reverse order (unfilter the chunk) + * B. Update the chunk data with the modifications from + * the owning rank + * C. Receive any modification data from other ranks and + * update the chunk data with those modifications + * D. Filter the chunk + * E. Contribute the chunk to an array gathered by + * all ranks which contains information for + * re-allocating space in the file for every chunk + * modified in this iteration (up to one chunk per + * rank; some ranks may not have a selection/may have + * less chunks to work on than other ranks). Then, + * each rank collectively re-allocates each chunk + * from the gathered array with their new sizes + * after the filter operation + * F. Proceed with the collective write operation + * for the chunks modified on this iteration + * G. Contribute the chunk to an array gathered by + * all ranks which contains information for + * re-inserting every chunk modified on this + * iteration into the chunk index. Then, each rank + * collectively re-inserts each chunk from the + * gathered array into the chunk index + * + * TODO: Note that steps E. and G. here are both collective + * operations that partially share data from the + * H5D_filtered_collective_io_info_t structure. To + * try to conserve on memory a bit, the distributed + * arrays these operations create are discarded after + * each operation is performed. If memory consumption + * here proves to not be an issue, the necessary data + * for both operations could be combined into a single + * structure so that only one collective MPI operation + * is needed to carry out both operations, rather than + * two. * * Return: Non-negative on success/Negative on failure * - * Programmer: Jordan Henderson - * Friday, Dec. 2nd, 2016 - * *------------------------------------------------------------------------- */ static herr_t H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, - H5D_chunk_map_t *fm) + H5D_chunk_map_t *fm, int mpi_rank, int mpi_size) { - H5D_filtered_collective_io_info_t *chunk_list = NULL; /* The list of chunks being read/written */ - H5D_filtered_collective_io_info_t *collective_chunk_list = - NULL; /* The list of chunks used during collective operations */ - H5D_storage_t store; /* union of EFL and chunk pointer in file space */ - H5D_io_info_t ctg_io_info; /* Contiguous I/O info object */ - H5D_storage_t ctg_store; /* Chunk storage information as contiguous dataset */ - MPI_Datatype *file_type_array = NULL; - MPI_Datatype *mem_type_array = NULL; - hbool_t * file_type_is_derived_array = NULL; - hbool_t * mem_type_is_derived_array = NULL; - hbool_t * has_chunk_selected_array = - NULL; /* Array of whether or not each process is contributing a chunk to each iteration */ - size_t chunk_list_num_entries; - size_t collective_chunk_list_num_entries; - size_t i, j; /* Local index variable */ - int mpi_rank, mpi_size, mpi_code; - herr_t ret_value = SUCCEED; + H5D_filtered_collective_io_info_t *chunk_list = NULL; /* The list of chunks being read/written */ + H5D_filtered_collective_io_info_t *chunk_hash_table = NULL; + unsigned char ** chunk_msg_bufs = NULL; + H5D_io_info_t ctg_io_info; /* Contiguous I/O info object */ + H5D_storage_t ctg_store; /* Chunk storage information as contiguous dataset */ + MPI_Datatype mem_type = MPI_BYTE; + MPI_Datatype file_type = MPI_BYTE; + hbool_t mem_type_is_derived = FALSE; + hbool_t file_type_is_derived = FALSE; + hbool_t have_chunk_to_process; + size_t chunk_list_num_entries; + size_t i; + size_t max_num_chunks; + int chunk_msg_bufs_len = 0; + int mpi_code; + herr_t ret_value = SUCCEED; FUNC_ENTER_STATIC @@ -1797,11 +2165,12 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_i HDassert(type_info); HDassert(fm); - /* Obtain the current rank of the process and the number of processes */ - if ((mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file)) < 0) - HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi rank") - if ((mpi_size = H5F_mpi_get_size(io_info->dset->oloc.file)) < 0) - HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi size") +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TRACE_ENTER(mpi_rank); + H5D_MPIO_DEBUG_VA(mpi_rank, "Performing Multi-chunk I/O (%s) with MPI Comm size of %d", + io_info->op_type == H5D_IO_OP_WRITE ? "write" : "read", mpi_size); + H5D_MPIO_TIME_START(mpi_rank, "Multi-chunk I/O"); +#endif /* Set the actual chunk opt mode property */ H5CX_set_mpio_actual_chunk_opt(H5D_MPIO_MULTI_CHUNK); @@ -1812,10 +2181,19 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_i H5CX_set_mpio_actual_io_mode(H5D_MPIO_CHUNK_COLLECTIVE); /* Build a list of selected chunks in the collective IO operation */ - if (H5D__construct_filtered_io_info_list(io_info, type_info, fm, &chunk_list, &chunk_list_num_entries) < - 0) + if (H5D__mpio_collective_filtered_chunk_io_setup(io_info, type_info, fm, &chunk_list, + &chunk_list_num_entries, mpi_rank) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "couldn't construct filtered I/O info list") + /* Retrieve the maximum number of chunks selected for any rank */ + if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&chunk_list_num_entries, &max_num_chunks, 1, + MPI_UNSIGNED_LONG_LONG, MPI_MAX, io_info->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code) + + /* If no one has anything selected at all, end the operation */ + if (0 == max_num_chunks) + HGOTO_DONE(SUCCEED); + /* Set up contiguous I/O info object */ H5MM_memcpy(&ctg_io_info, io_info, sizeof(ctg_io_info)); ctg_io_info.store = &ctg_store; @@ -1823,190 +2201,147 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_i /* Initialize temporary contiguous storage info */ ctg_store.contig.dset_size = (hsize_t)io_info->dset->shared->layout.u.chunk.size; - ctg_store.contig.dset_addr = 0; - - /* Set dataset storage for I/O info */ - io_info->store = &store; if (io_info->op_type == H5D_IO_OP_READ) { /* Filtered collective read */ - for (i = 0; i < chunk_list_num_entries; i++) - if (H5D__filtered_collective_chunk_entry_io(&chunk_list[i], io_info, type_info, fm) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't process chunk entry") - } /* end if */ + for (i = 0; i < max_num_chunks; i++) { + /* Check if this rank has a chunk to work on for this iteration */ + have_chunk_to_process = (i < chunk_list_num_entries); + + if (H5D__mpio_collective_filtered_chunk_read(have_chunk_to_process ? &chunk_list[i] : NULL, + have_chunk_to_process ? 1 : 0, io_info, type_info, + mpi_rank, mpi_size) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't read filtered chunks") + + if (have_chunk_to_process && chunk_list[i].buf) { + H5MM_free(chunk_list[i].buf); + chunk_list[i].buf = NULL; + } + } + } else { /* Filtered collective write */ H5D_chk_idx_info_t index_info; - H5D_chunk_ud_t udata; - size_t max_num_chunks; hsize_t mpi_buf_count; /* Construct chunked index info */ - index_info.f = io_info->dset->oloc.file; - index_info.pline = &(io_info->dset->shared->dcpl_cache.pline); - index_info.layout = &(io_info->dset->shared->layout.u.chunk); - index_info.storage = &(io_info->dset->shared->layout.storage.u.chunk); - - /* Set up chunk information for insertion to chunk index */ - udata.common.layout = index_info.layout; - udata.common.storage = index_info.storage; - udata.filter_mask = 0; - - /* Retrieve the maximum number of chunks being written among all processes */ - if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&chunk_list_num_entries, &max_num_chunks, 1, - MPI_UNSIGNED_LONG_LONG, MPI_MAX, io_info->comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code) - - /* If no one is writing anything at all, end the operation */ - if (!(max_num_chunks > 0)) - HGOTO_DONE(SUCCEED); - - /* Allocate arrays for storing MPI file and mem types and whether or not the - * types were derived. - */ - if (NULL == (file_type_array = (MPI_Datatype *)H5MM_malloc(max_num_chunks * sizeof(MPI_Datatype)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate file type array") - - if (NULL == (file_type_is_derived_array = (hbool_t *)H5MM_calloc(max_num_chunks * sizeof(hbool_t)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate file type is derived array") - - if (NULL == (mem_type_array = (MPI_Datatype *)H5MM_malloc(max_num_chunks * sizeof(MPI_Datatype)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate mem type array") - - if (NULL == (mem_type_is_derived_array = (hbool_t *)H5MM_calloc(max_num_chunks * sizeof(hbool_t)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate mem type is derived array") + H5D_MPIO_INIT_CHUNK_IDX_INFO(index_info, io_info); + + if (mpi_size > 1) { + /* Redistribute shared chunks being written to */ + if (H5D__mpio_redistribute_shared_chunks(chunk_list, chunk_list_num_entries, io_info, fm, + mpi_rank, mpi_size, NULL) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "unable to redistribute shared chunks") + + /* Send any chunk modification messages for chunks this rank no longer owns */ + if (H5D__mpio_share_chunk_modification_data(chunk_list, &chunk_list_num_entries, io_info, + type_info, mpi_rank, mpi_size, &chunk_hash_table, + &chunk_msg_bufs, &chunk_msg_bufs_len) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, + "unable to send chunk modification data between MPI ranks") + } - /* Iterate over the max number of chunks among all processes, as this process could - * have no chunks left to work on, but it still needs to participate in the collective - * re-allocation and re-insertion of chunks modified by other processes. + /* Iterate over the max number of chunks among all ranks, as this rank could + * have no chunks left to work on, but it still needs to participate in the + * collective re-allocation and re-insertion of chunks modified by other ranks. */ for (i = 0; i < max_num_chunks; i++) { - /* Check if this process has a chunk to work on for this iteration */ - hbool_t have_chunk_to_process = - (i < chunk_list_num_entries) && (mpi_rank == chunk_list[i].owners.new_owner); + /* Check if this rank has a chunk to work on for this iteration */ + have_chunk_to_process = (i < chunk_list_num_entries) && (mpi_rank == chunk_list[i].new_owner); - if (have_chunk_to_process) - if (H5D__filtered_collective_chunk_entry_io(&chunk_list[i], io_info, type_info, fm) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't process chunk entry") - - /* Gather the new chunk sizes to all processes for a collective re-allocation - * of the chunks in the file - */ - if (H5D__mpio_array_gatherv(&chunk_list[i], have_chunk_to_process ? 1 : 0, - sizeof(H5D_filtered_collective_io_info_t), - (void **)&collective_chunk_list, &collective_chunk_list_num_entries, - true, 0, io_info->comm, NULL) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, "couldn't gather new chunk sizes") - - /* Participate in the collective re-allocation of all chunks modified - * in this iteration. + /* Proceed to update the chunk this rank owns (if any left) with its + * own modification data and data from other ranks, before re-filtering + * the chunks. As chunk reads are done collectively here, all ranks + * must participate. */ - for (j = 0; j < collective_chunk_list_num_entries; j++) { - hbool_t insert = FALSE; - - if (H5D__chunk_file_alloc(&index_info, &collective_chunk_list[j].chunk_states.chunk_current, - &collective_chunk_list[j].chunk_states.new_chunk, &insert, - chunk_list[j].scaled) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate chunk") - } /* end for */ - - if (NULL == - (has_chunk_selected_array = (hbool_t *)H5MM_malloc((size_t)mpi_size * sizeof(hbool_t)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate num chunks selected array") + if (H5D__mpio_collective_filtered_chunk_update(have_chunk_to_process ? &chunk_list[i] : NULL, + have_chunk_to_process ? 1 : 0, chunk_hash_table, + chunk_msg_bufs, chunk_msg_bufs_len, io_info, + type_info, mpi_rank, mpi_size) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't update modified chunks") + + /* All ranks now collectively re-allocate file space for all chunks */ + if (H5D__mpio_collective_filtered_chunk_reallocate(have_chunk_to_process ? &chunk_list[i] : NULL, + have_chunk_to_process ? 1 : 0, NULL, io_info, + &index_info, mpi_rank, mpi_size) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, + "couldn't collectively re-allocate file space for chunks") - if (MPI_SUCCESS != - (mpi_code = MPI_Allgather(&have_chunk_to_process, 1, MPI_C_BOOL, has_chunk_selected_array, 1, - MPI_C_BOOL, io_info->comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code) - - /* If this process has a chunk to work on, create a MPI type for the - * memory and file for writing out the chunk + /* + * If this rank has a chunk to work on, create a MPI type + * for writing out the chunk. Otherwise, the rank will + * use MPI_BYTE for the file and memory type and specify + * a count of 0. */ - if (have_chunk_to_process) { - size_t offset; - int mpi_type_count; - - for (j = 0, offset = 0; j < (size_t)mpi_rank; j++) - offset += has_chunk_selected_array[j]; + if (H5D__mpio_collective_filtered_io_type( + have_chunk_to_process ? &chunk_list[i] : NULL, have_chunk_to_process ? 1 : 0, + io_info->op_type, &mem_type, &mem_type_is_derived, &file_type, &file_type_is_derived) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, + "couldn't create MPI type for writing filtered chunks") - /* Collect the new chunk info back to the local copy, since only the record in the - * collective array gets updated by the chunk re-allocation */ - H5MM_memcpy(&chunk_list[i].chunk_states.new_chunk, - &collective_chunk_list[offset].chunk_states.new_chunk, - sizeof(chunk_list[i].chunk_states.new_chunk)); + mpi_buf_count = (file_type_is_derived || mem_type_is_derived) ? 1 : 0; - H5_CHECKED_ASSIGN(mpi_type_count, int, chunk_list[i].chunk_states.new_chunk.length, hsize_t); - - /* Create MPI memory type for writing to chunk */ - if (MPI_SUCCESS != - (mpi_code = MPI_Type_contiguous(mpi_type_count, MPI_BYTE, &mem_type_array[i]))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) - if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&mem_type_array[i]))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) - mem_type_is_derived_array[i] = TRUE; - - /* Create MPI file type for writing to chunk */ - if (MPI_SUCCESS != - (mpi_code = MPI_Type_contiguous(mpi_type_count, MPI_BYTE, &file_type_array[i]))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) - if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&file_type_array[i]))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) - file_type_is_derived_array[i] = TRUE; - - mpi_buf_count = 1; - - /* Set up the base storage address for this operation */ - ctg_store.contig.dset_addr = chunk_list[i].chunk_states.new_chunk.offset; - - /* Override the write buffer to point to the address of the - * chunk data buffer + /* Override the write buffer to point to the chunk data buffer */ + if (have_chunk_to_process) { + /* + * Override the write buffer to point to the + * chunk's data buffer */ ctg_io_info.u.wbuf = chunk_list[i].buf; - } /* end if */ - else { - mem_type_array[i] = file_type_array[i] = MPI_BYTE; - mpi_buf_count = 0; - } /* end else */ + + /* + * Setup the base storage address for this + * operation to be the chunk's file address + */ + ctg_store.contig.dset_addr = chunk_list[i].chunk_new.offset; + } + else + ctg_store.contig.dset_addr = 0; /* Perform the I/O */ - if (H5D__final_collective_io(&ctg_io_info, type_info, mpi_buf_count, file_type_array[i], - mem_type_array[i]) < 0) + if (H5D__final_collective_io(&ctg_io_info, type_info, mpi_buf_count, file_type, mem_type) < 0) HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish MPI-IO") + /* Free up resources in anticipation of following collective operation */ + if (have_chunk_to_process && chunk_list[i].buf) { + H5MM_free(chunk_list[i].buf); + chunk_list[i].buf = NULL; + } + /* Participate in the collective re-insertion of all chunks modified * in this iteration into the chunk index */ - for (j = 0; j < collective_chunk_list_num_entries; j++) { - udata.chunk_block = collective_chunk_list[j].chunk_states.new_chunk; - udata.common.scaled = collective_chunk_list[j].scaled; - udata.chunk_idx = collective_chunk_list[j].index; - - if ((index_info.storage->ops->insert)(&index_info, &udata, io_info->dset) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTINSERT, FAIL, - "unable to insert chunk address into index") - } /* end for */ + if (H5D__mpio_collective_filtered_chunk_reinsert(have_chunk_to_process ? &chunk_list[i] : NULL, + have_chunk_to_process ? 1 : 0, NULL, io_info, + &index_info, mpi_rank, mpi_size) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, + "couldn't collectively re-insert modified chunks into chunk index") + + /* Free the MPI types, if they were derived */ + if (mem_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + mem_type_is_derived = FALSE; + if (file_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&file_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + file_type_is_derived = FALSE; + } /* end for */ + } - if (collective_chunk_list) { - H5MM_free(collective_chunk_list); - collective_chunk_list = NULL; - } /* end if */ - if (has_chunk_selected_array) { - H5MM_free(has_chunk_selected_array); - has_chunk_selected_array = NULL; - } /* end if */ - } /* end for */ +done: + /* Free the MPI buf and file types, if they were derived */ + if (mem_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + if (file_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&file_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) - /* Free the MPI file and memory types, if they were derived */ - for (i = 0; i < max_num_chunks; i++) { - if (file_type_is_derived_array[i]) - if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&file_type_array[i]))) - HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + if (chunk_msg_bufs) { + for (i = 0; i < (size_t)chunk_msg_bufs_len; i++) + H5MM_free(chunk_msg_bufs[i]); - if (mem_type_is_derived_array[i]) - if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type_array[i]))) - HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) - } /* end for */ - } /* end else */ + H5MM_free(chunk_msg_bufs); + } -done: + HASH_CLEAR(hh, chunk_hash_table); + + /* Free resources used by a rank which had some selection */ if (chunk_list) { for (i = 0; i < chunk_list_num_entries; i++) if (chunk_list[i].buf) @@ -2015,16 +2350,10 @@ done: H5MM_free(chunk_list); } /* end if */ - if (collective_chunk_list) - H5MM_free(collective_chunk_list); - if (file_type_array) - H5MM_free(file_type_array); - if (mem_type_array) - H5MM_free(mem_type_array); - if (file_type_is_derived_array) - H5MM_free(file_type_is_derived_array); - if (mem_type_is_derived_array) - H5MM_free(mem_type_is_derived_array); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TIME_STOP(mpi_rank); + H5D_MPIO_TRACE_EXIT(mpi_rank); +#endif FUNC_LEAVE_NOAPI(ret_value) } /* end H5D__multi_chunk_filtered_collective_io() */ @@ -2050,11 +2379,22 @@ H5D__inter_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf hbool_t mbt_is_derived = FALSE; hbool_t mft_is_derived = FALSE; MPI_Datatype mpi_file_type, mpi_buf_type; - int mpi_code; /* MPI return code */ - herr_t ret_value = SUCCEED; /* return value */ + int mpi_code; /* MPI return code */ +#ifdef H5Dmpio_DEBUG + int mpi_rank; +#endif + herr_t ret_value = SUCCEED; /* return value */ FUNC_ENTER_STATIC +#ifdef H5Dmpio_DEBUG + mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file); + H5D_MPIO_TRACE_ENTER(mpi_rank); + H5D_MPIO_TIME_START(mpi_rank, "Inter collective I/O"); + if (mpi_rank < 0) + HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain MPI rank") +#endif + if ((file_space != NULL) && (mem_space != NULL)) { int mpi_file_count; /* Number of file "objects" to transfer */ hsize_t *permute_map = NULL; /* array that holds the mapping from the old, @@ -2113,9 +2453,8 @@ H5D__inter_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf mft_is_derived = FALSE; } /* end else */ -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "before final collective IO \n"); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_DEBUG(mpi_rank, "before final collective I/O"); #endif /* Perform final collective I/O operation */ @@ -2129,9 +2468,10 @@ done: if (mft_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&mpi_file_type))) HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "before leaving inter_collective_io ret_value = %d\n", ret_value); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TIME_STOP(mpi_rank); + H5D_MPIO_DEBUG_VA(mpi_rank, "before leaving inter_collective_io ret_value = %d", ret_value); + H5D_MPIO_TRACE_EXIT(mpi_rank); #endif FUNC_LEAVE_NOAPI(ret_value) @@ -2153,10 +2493,21 @@ static herr_t H5D__final_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t mpi_buf_count, MPI_Datatype mpi_file_type, MPI_Datatype mpi_buf_type) { +#ifdef H5Dmpio_DEBUG + int mpi_rank; +#endif herr_t ret_value = SUCCEED; FUNC_ENTER_STATIC +#ifdef H5Dmpio_DEBUG + mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file); + H5D_MPIO_TRACE_ENTER(mpi_rank); + H5D_MPIO_TIME_START(mpi_rank, "Final collective I/O"); + if (mpi_rank < 0) + HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain MPI rank") +#endif + /* Pass buf type, file type to the file driver. */ if (H5CX_set_mpi_coll_datatypes(mpi_buf_type, mpi_file_type) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTSET, FAIL, "can't set MPI-I/O collective I/O datatypes") @@ -2171,10 +2522,12 @@ H5D__final_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf } /* end else */ done: -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "ret_value before leaving final_collective_io=%d\n", ret_value); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TIME_STOP(mpi_rank); + H5D_MPIO_DEBUG_VA(mpi_rank, "ret_value before leaving final_collective_io=%d", ret_value); + H5D_MPIO_TRACE_EXIT(mpi_rank); #endif + FUNC_LEAVE_NOAPI(ret_value) } /* end H5D__final_collective_io */ @@ -2216,62 +2569,149 @@ H5D__cmp_chunk_addr(const void *chunk_addr_info1, const void *chunk_addr_info2) * * Return: -1, 0, 1 * - * Programmer: Jordan Henderson - * Wednesday, Nov. 30th, 2016 - * *------------------------------------------------------------------------- */ static int H5D__cmp_filtered_collective_io_info_entry(const void *filtered_collective_io_info_entry1, const void *filtered_collective_io_info_entry2) { - haddr_t addr1 = HADDR_UNDEF, addr2 = HADDR_UNDEF; + const H5D_filtered_collective_io_info_t *entry1; + const H5D_filtered_collective_io_info_t *entry2; + haddr_t addr1 = HADDR_UNDEF; + haddr_t addr2 = HADDR_UNDEF; + int ret_value; FUNC_ENTER_STATIC_NOERR - addr1 = ((const H5D_filtered_collective_io_info_t *)filtered_collective_io_info_entry1) - ->chunk_states.new_chunk.offset; - addr2 = ((const H5D_filtered_collective_io_info_t *)filtered_collective_io_info_entry2) - ->chunk_states.new_chunk.offset; + entry1 = (const H5D_filtered_collective_io_info_t *)filtered_collective_io_info_entry1; + entry2 = (const H5D_filtered_collective_io_info_t *)filtered_collective_io_info_entry2; - FUNC_LEAVE_NOAPI(H5F_addr_cmp(addr1, addr2)) -} /* end H5D__cmp_filtered_collective_io_info_entry() */ + addr1 = entry1->chunk_new.offset; + addr2 = entry2->chunk_new.offset; -#if MPI_VERSION >= 3 + /* + * If both chunk addresses are defined, H5F_addr_cmp is safe to use. + * Otherwise, if both addresses aren't defined, compared chunk + * entries based on their chunk index. Finally, if only one chunk + * address is defined, return the appropriate value based on which + * is defined. + */ + if (H5F_addr_defined(addr1) && H5F_addr_defined(addr2)) { + ret_value = H5F_addr_cmp(addr1, addr2); + } + else if (!H5F_addr_defined(addr1) && !H5F_addr_defined(addr2)) { + hsize_t chunk_idx1 = entry1->index_info.chunk_idx; + hsize_t chunk_idx2 = entry2->index_info.chunk_idx; + + ret_value = (chunk_idx1 > chunk_idx2) - (chunk_idx1 < chunk_idx2); + } + else + ret_value = H5F_addr_defined(addr1) ? 1 : -1; + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__cmp_filtered_collective_io_info_entry() */ /*------------------------------------------------------------------------- - * Function: H5D__cmp_filtered_collective_io_info_entry_owner + * Function: H5D__cmp_chunk_redistribute_info * - * Purpose: Routine to compare filtered collective chunk io info - * entries's original owner fields + * Purpose: Routine to compare two H5D_chunk_redistribute_info_t + * structures * - * Description: Callback for qsort() to compare filtered collective chunk - * io info entries's original owner fields + * Description: Callback for qsort() to compare two + * H5D_chunk_redistribute_info_t structures + * + * Return: -1, 0, 1 + * + *------------------------------------------------------------------------- + */ +static int +H5D__cmp_chunk_redistribute_info(const void *_entry1, const void *_entry2) +{ + const H5D_chunk_redistribute_info_t *entry1; + const H5D_chunk_redistribute_info_t *entry2; + hsize_t chunk_index1; + hsize_t chunk_index2; + int ret_value; + + FUNC_ENTER_STATIC_NOERR + + entry1 = (const H5D_chunk_redistribute_info_t *)_entry1; + entry2 = (const H5D_chunk_redistribute_info_t *)_entry2; + + chunk_index1 = entry1->chunk_idx; + chunk_index2 = entry2->chunk_idx; + + if (chunk_index1 == chunk_index2) { + int orig_owner1 = entry1->orig_owner; + int orig_owner2 = entry2->orig_owner; + + ret_value = (orig_owner1 > orig_owner2) - (orig_owner1 < orig_owner2); + } + else + ret_value = (chunk_index1 > chunk_index2) - (chunk_index1 < chunk_index2); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__cmp_chunk_redistribute_info() */ + +/*------------------------------------------------------------------------- + * Function: H5D__cmp_chunk_redistribute_info_orig_owner * - * Return: The difference between the two - * H5D_filtered_collective_io_info_t's original owner fields + * Purpose: Routine to compare the original owning MPI rank for two + * H5D_chunk_redistribute_info_t structures * - * Programmer: Jordan Henderson - * Monday, Apr. 10th, 2017 + * Description: Callback for qsort() to compare the original owning MPI + * rank for two H5D_chunk_redistribute_info_t + * structures + * + * Return: -1, 0, 1 * *------------------------------------------------------------------------- */ static int -H5D__cmp_filtered_collective_io_info_entry_owner(const void *filtered_collective_io_info_entry1, - const void *filtered_collective_io_info_entry2) +H5D__cmp_chunk_redistribute_info_orig_owner(const void *_entry1, const void *_entry2) { - int owner1 = -1, owner2 = -1; + const H5D_chunk_redistribute_info_t *entry1; + const H5D_chunk_redistribute_info_t *entry2; + int owner1 = -1; + int owner2 = -1; + int ret_value; FUNC_ENTER_STATIC_NOERR - owner1 = ((const H5D_filtered_collective_io_info_t *)filtered_collective_io_info_entry1) - ->owners.original_owner; - owner2 = ((const H5D_filtered_collective_io_info_t *)filtered_collective_io_info_entry2) - ->owners.original_owner; + entry1 = (const H5D_chunk_redistribute_info_t *)_entry1; + entry2 = (const H5D_chunk_redistribute_info_t *)_entry2; - FUNC_LEAVE_NOAPI(owner1 - owner2) -} /* end H5D__cmp_filtered_collective_io_info_entry_owner() */ -#endif + owner1 = entry1->orig_owner; + owner2 = entry2->orig_owner; + + if (owner1 == owner2) { + haddr_t addr1 = entry1->chunk_block.offset; + haddr_t addr2 = entry2->chunk_block.offset; + + /* + * If both chunk addresses are defined, H5F_addr_cmp is safe to use. + * Otherwise, if both addresses aren't defined, compared chunk + * entries based on their chunk index. Finally, if only one chunk + * address is defined, return the appropriate value based on which + * is defined. + */ + if (H5F_addr_defined(addr1) && H5F_addr_defined(addr2)) { + ret_value = H5F_addr_cmp(addr1, addr2); + } + else if (!H5F_addr_defined(addr1) && !H5F_addr_defined(addr2)) { + hsize_t chunk_idx1 = entry1->chunk_idx; + hsize_t chunk_idx2 = entry2->chunk_idx; + + ret_value = (chunk_idx1 > chunk_idx2) - (chunk_idx1 < chunk_idx2); + } + else + ret_value = H5F_addr_defined(addr1) ? 1 : -1; + } + else + ret_value = (owner1 > owner2) - (owner1 < owner2); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__cmp_chunk_redistribute_info_orig_owner() */ /*------------------------------------------------------------------------- * Function: H5D__sort_chunk @@ -2300,7 +2740,7 @@ H5D__cmp_filtered_collective_io_info_entry_owner(const void *filtered_collective */ static herr_t H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm, - H5D_chunk_addr_info_t chunk_addr_info_array[], int sum_chunk) + H5D_chunk_addr_info_t chunk_addr_info_array[], int sum_chunk, int mpi_rank, int mpi_size) { H5SL_node_t * chunk_node; /* Current node in chunk skip list */ H5D_chunk_info_t *chunk_info; /* Current chunking info. of this node. */ @@ -2312,17 +2752,12 @@ H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm, hbool_t do_sort = FALSE; /* Whether the addresses need to be sorted */ int bsearch_coll_chunk_threshold; int many_chunk_opt = H5D_OBTAIN_ONE_CHUNK_ADDR_IND; - int mpi_size; /* Number of MPI processes */ int mpi_code; /* MPI return code */ int i; /* Local index variable */ herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_STATIC - /* Retrieve # of MPI processes */ - if ((mpi_size = H5F_mpi_get_size(io_info->dset->oloc.file)) < 0) - HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi size") - /* Calculate the actual threshold to obtain all chunk addresses collectively * The bigger this number is, the more possible the use of obtaining chunk * address collectively. @@ -2336,28 +2771,20 @@ H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm, ((sum_chunk / mpi_size) >= H5D_ALL_CHUNK_ADDR_THRES_COL_NUM)) many_chunk_opt = H5D_OBTAIN_ALL_CHUNK_ADDR_COL; -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "many_chunk_opt= %d\n", many_chunk_opt); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_DEBUG_VA(mpi_rank, "many_chunk_opt = %d", many_chunk_opt); #endif /* If we need to optimize the way to obtain the chunk address */ if (many_chunk_opt != H5D_OBTAIN_ONE_CHUNK_ADDR_IND) { - int mpi_rank; - -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "Coming inside H5D_OBTAIN_ALL_CHUNK_ADDR_COL\n"); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_DEBUG(mpi_rank, "Coming inside H5D_OBTAIN_ALL_CHUNK_ADDR_COL"); #endif /* Allocate array for chunk addresses */ if (NULL == (total_chunk_addr_array = (haddr_t *)H5MM_malloc(sizeof(haddr_t) * (size_t)fm->layout->u.chunk.nchunks))) HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "unable to allocate memory chunk address array") - /* Retrieve all the chunk addresses with process 0 */ - if ((mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file)) < 0) - HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi rank") - if (mpi_rank == 0) { herr_t result; @@ -2437,10 +2864,10 @@ H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm, chunk_node = H5SL_next(chunk_node); } /* end while */ -#ifdef H5D_DEBUG - if (H5DEBUG(D)) - HDfprintf(H5DEBUG(D), "before Qsort\n"); +#ifdef H5Dmpio_DEBUG + H5D_MPIO_DEBUG(mpi_rank, "before Qsort"); #endif + if (do_sort) { size_t num_chunks = H5SL_count(fm->sel_chunks); @@ -2497,7 +2924,7 @@ done: */ static herr_t H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_chunk_map_t *fm, uint8_t assign_io_mode[], - haddr_t chunk_addr[]) + haddr_t chunk_addr[], int mpi_rank, int mpi_size) { size_t total_chunks; unsigned percent_nproc_per_chunk, threshold_nproc_per_chunk; @@ -2510,7 +2937,6 @@ H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_chunk_map_t *fm, uint8_t assig H5P_coll_md_read_flag_t md_reads_file_flag; hbool_t md_reads_context_flag; hbool_t restore_md_reads_state = FALSE; - int mpi_size, mpi_rank; MPI_Comm comm; int root; size_t ic; @@ -2523,12 +2949,6 @@ H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_chunk_map_t *fm, uint8_t assig root = 0; comm = io_info->comm; - /* Obtain the number of process and the current rank of the process */ - if ((mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file)) < 0) - HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi rank") - if ((mpi_size = H5F_mpi_get_size(io_info->dset->oloc.file)) < 0) - HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi size") - /* Setup parameters */ H5_CHECKED_ASSIGN(total_chunks, size_t, fm->layout->u.chunk.nchunks, hsize_t); if (H5CX_get_mpio_chunk_opt_ratio(&percent_nproc_per_chunk) < 0) @@ -2672,34 +3092,32 @@ done: } /* end H5D__obtain_mpio_mode() */ /*------------------------------------------------------------------------- - * Function: H5D__construct_filtered_io_info_list + * Function: H5D__mpio_collective_filtered_chunk_io_setup * * Purpose: Constructs a list of entries which contain the necessary * information for inter-process communication when performing * collective io on filtered chunks. This list is used by - * each process when performing I/O on locally selected chunks - * and also in operations that must be collectively done - * on every chunk, such as chunk re-allocation, insertion of - * chunks into the chunk index, etc. + * each MPI rank when performing I/O on locally selected + * chunks and also in operations that must be collectively + * done on every chunk, such as chunk re-allocation, insertion + * of chunks into the chunk index, etc. * * Return: Non-negative on success/Negative on failure * - * Programmer: Jordan Henderson - * Tuesday, January 10th, 2017 - * *------------------------------------------------------------------------- */ static herr_t -H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, - const H5D_chunk_map_t * fm, - H5D_filtered_collective_io_info_t **chunk_list, size_t *num_entries) +H5D__mpio_collective_filtered_chunk_io_setup(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, + const H5D_chunk_map_t * fm, + H5D_filtered_collective_io_info_t **chunk_list, + size_t *num_entries, int mpi_rank) { - H5D_filtered_collective_io_info_t *local_info_array = - NULL; /* The list of initially selected chunks for this process */ - size_t num_chunks_selected; - size_t i; - int mpi_rank; - herr_t ret_value = SUCCEED; + H5D_filtered_collective_io_info_t *local_info_array = NULL; + H5D_chunk_ud_t udata; + hbool_t filter_partial_edge_chunks; + size_t num_chunks_selected; + size_t i; + herr_t ret_value = SUCCEED; FUNC_ENTER_STATIC @@ -2709,19 +3127,23 @@ H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_typ HDassert(chunk_list); HDassert(num_entries); - if ((mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file)) < 0) - HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi rank") +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TRACE_ENTER(mpi_rank); + H5D_MPIO_TIME_START(mpi_rank, "Filtered Collective I/O Setup"); +#endif - /* Each process builds a local list of the chunks they have selected */ + /* Each rank builds a local list of the chunks they have selected */ if ((num_chunks_selected = H5SL_count(fm->sel_chunks))) { H5D_chunk_info_t *chunk_info; - H5D_chunk_ud_t udata; H5SL_node_t * chunk_node; hsize_t select_npoints; - hssize_t chunk_npoints; + hbool_t need_sort = FALSE; + + /* Determine whether partial edge chunks should be filtered */ + filter_partial_edge_chunks = !(io_info->dset->shared->layout.u.chunk.flags & + H5O_LAYOUT_CHUNK_DONT_FILTER_PARTIAL_BOUND_CHUNKS); - if (NULL == (local_info_array = (H5D_filtered_collective_io_info_t *)H5MM_malloc( - num_chunks_selected * sizeof(H5D_filtered_collective_io_info_t)))) + if (NULL == (local_info_array = H5MM_malloc(num_chunks_selected * sizeof(*local_info_array)))) HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate local io info array buffer") chunk_node = H5SL_first(fm->sel_chunks); @@ -2732,275 +3154,787 @@ H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_typ if (H5D__chunk_lookup(io_info->dset, chunk_info->scaled, &udata) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address") - local_info_array[i].index = chunk_info->index; - local_info_array[i].chunk_states.chunk_current = local_info_array[i].chunk_states.new_chunk = - udata.chunk_block; - local_info_array[i].num_writers = 0; - local_info_array[i].owners.original_owner = local_info_array[i].owners.new_owner = mpi_rank; - local_info_array[i].buf = NULL; - - local_info_array[i].async_info.num_receive_requests = 0; - local_info_array[i].async_info.receive_buffer_array = NULL; - local_info_array[i].async_info.receive_requests_array = NULL; - - H5MM_memcpy(local_info_array[i].scaled, chunk_info->scaled, sizeof(chunk_info->scaled)); - - select_npoints = H5S_GET_SELECT_NPOINTS(chunk_info->mspace); - local_info_array[i].io_size = (size_t)select_npoints * type_info->src_type_size; - - /* Currently the full overwrite status of a chunk is only obtained on a per-process - * basis. This means that if the total selection in the chunk, as determined by the combination - * of selections of all of the processes interested in the chunk, covers the entire chunk, - * the performance optimization of not reading the chunk from the file is still valid, but - * is not applied in the current implementation. Something like an appropriately placed - * MPI_Allreduce or a running total of the number of chunk points selected during chunk - * redistribution should suffice for implementing this case - JTH. + /* Initialize rank-local chunk info */ + local_info_array[i].chunk_info = chunk_info; + local_info_array[i].chunk_buf_size = 0; + local_info_array[i].num_writers = 0; + local_info_array[i].orig_owner = mpi_rank; + local_info_array[i].new_owner = mpi_rank; + local_info_array[i].buf = NULL; + + select_npoints = H5S_GET_SELECT_NPOINTS(chunk_info->fspace); + local_info_array[i].io_size = (size_t)select_npoints * type_info->dst_type_size; + + /* + * Determine whether this chunk will need to be read from the file. If this is + * a read operation, the chunk will be read. If this is a write operation, we + * generally need to read a filtered chunk from the file before modifying it, + * unless the chunk is being fully overwritten. + * + * TODO: Currently the full overwrite status of a chunk is only obtained on a + * per-rank basis. This means that if the total selection in the chunk, as + * determined by the combination of selections of all of the ranks interested in + * the chunk, covers the entire chunk, the performance optimization of not reading + * the chunk from the file is still valid, but is not applied in the current + * implementation. + * + * To implement this case, a few approaches were considered: + * + * - Keep a running total (distributed to each rank) of the number of chunk + * elements selected during chunk redistribution and compare that to the total + * number of elements in the chunk once redistribution is finished + * + * - Process all incoming chunk messages before doing I/O (these are currently + * processed AFTER doing I/O), combine the owning rank's selection in a chunk + * with the selections received from other ranks and check to see whether that + * combined selection covers the entire chunk + * + * The first approach will be dangerous if the application performs an overlapping + * write to a chunk, as the number of selected elements can equal or exceed the + * number of elements in the chunk without the whole chunk selection being covered. + * While it might be considered erroneous for an application to do an overlapping + * write, we don't explicitly disallow it. + * + * The second approach contains a bit of complexity in that part of the chunk + * messages will be needed before doing I/O and part will be needed after doing I/O. + * Since modification data from chunk messages can't be applied until after any I/O + * is performed (otherwise, we'll overwrite any applied modification data), chunk + * messages are currently entirely processed after I/O. However, in order to determine + * if a chunk is being fully overwritten, we need the dataspace portion of the chunk + * messages before doing I/O. The naive way to do this is to process chunk messages + * twice, using just the relevant information from the message before and after I/O. + * The better way would be to avoid processing chunk messages twice by extracting (and + * keeping around) the dataspace portion of the message before I/O and processing the + * rest of the chunk message after I/O. Note that the dataspace portion of each chunk + * message is used to correctly apply chunk modification data from the message, so + * must be kept around both before and after I/O in this case. */ - if ((chunk_npoints = H5S_GET_EXTENT_NPOINTS(chunk_info->fspace)) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid") - local_info_array[i].full_overwrite = - (local_info_array[i].io_size >= (hsize_t)chunk_npoints * type_info->dst_type_size) ? TRUE - : FALSE; + if (io_info->op_type == H5D_IO_OP_READ) + local_info_array[i].need_read = TRUE; + else { + local_info_array[i].need_read = + local_info_array[i].io_size < (size_t)io_info->dset->shared->layout.u.chunk.size; + } + + local_info_array[i].skip_filter_pline = FALSE; + if (!filter_partial_edge_chunks) { + /* + * If this is a partial edge chunk and the "don't filter partial edge + * chunks" flag is set, make sure not to apply filters to the chunk. + */ + if (H5D__chunk_is_partial_edge_chunk(io_info->dset->shared->ndims, + io_info->dset->shared->layout.u.chunk.dim, + chunk_info->scaled, io_info->dset->shared->curr_dims)) + local_info_array[i].skip_filter_pline = TRUE; + } + + /* Initialize the chunk's shared info */ + local_info_array[i].chunk_current = udata.chunk_block; + local_info_array[i].chunk_new = udata.chunk_block; + + /* + * Check if the list is not in ascending order of offset in the file + * or has unallocated chunks. In either case, the list should get + * sorted. + */ + if (i) { + haddr_t curr_chunk_offset = local_info_array[i].chunk_current.offset; + haddr_t prev_chunk_offset = local_info_array[i - 1].chunk_current.offset; + + if (!H5F_addr_defined(prev_chunk_offset) || !H5F_addr_defined(curr_chunk_offset) || + (curr_chunk_offset < prev_chunk_offset)) + need_sort = TRUE; + } + + /* + * Extensible arrays may calculate a chunk's index a little differently + * than normal when the dataset's unlimited dimension is not the + * slowest-changing dimension, so set the index here based on what the + * extensible array code calculated instead of what was calculated + * in the chunk file mapping. + */ + if (io_info->dset->shared->layout.u.chunk.idx_type == H5D_CHUNK_IDX_EARRAY) + local_info_array[i].index_info.chunk_idx = udata.chunk_idx; + else + local_info_array[i].index_info.chunk_idx = chunk_info->index; + + local_info_array[i].index_info.filter_mask = udata.filter_mask; + local_info_array[i].index_info.need_insert = FALSE; chunk_node = H5SL_next(chunk_node); - } /* end for */ - } /* end if */ + } - /* Redistribute shared chunks to new owners as necessary */ - if (io_info->op_type == H5D_IO_OP_WRITE) -#if MPI_VERSION >= 3 - if (H5D__chunk_redistribute_shared_chunks(io_info, type_info, fm, local_info_array, - &num_chunks_selected) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "unable to redistribute shared chunks") -#else - HGOTO_ERROR( - H5E_DATASET, H5E_WRITEERROR, FAIL, - "unable to redistribute shared chunks - MPI version < 3 (MPI_Mprobe and MPI_Imrecv missing)") + /* Ensure the chunk list is sorted in ascending order of offset in the file */ + if (need_sort) + HDqsort(local_info_array, num_chunks_selected, sizeof(H5D_filtered_collective_io_info_t), + H5D__cmp_filtered_collective_io_info_entry); + +#ifdef H5Dmpio_DEBUG + H5D__mpio_dump_collective_filtered_chunk_list(local_info_array, num_chunks_selected, mpi_rank); #endif + } + else if (H5F_get_coll_metadata_reads(io_info->dset->oloc.file)) { + hsize_t scaled[H5O_LAYOUT_NDIMS] = {0}; + + /* + * If this rank has no selection in the dataset and collective + * metadata reads are enabled, do a fake lookup of a chunk to + * ensure that this rank has the chunk index opened. Otherwise, + * only the ranks that had a selection will have opened the + * chunk index and they will have done so independently. Therefore, + * when ranks with no selection participate in later collective + * metadata reads, they will try to open the chunk index collectively + * and issues will occur since other ranks won't participate. + * + * In the future, we should consider having a chunk index "open" + * callback that can be used to ensure collectivity between ranks + * in a more natural way, but this hack should suffice for now. + */ + if (H5D__chunk_lookup(io_info->dset, scaled, &udata) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address") + } *chunk_list = local_info_array; *num_entries = num_chunks_selected; done: - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5D__construct_filtered_io_info_list() */ +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TIME_STOP(mpi_rank); + H5D_MPIO_TRACE_EXIT(mpi_rank); +#endif -#if MPI_VERSION >= 3 + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__mpio_collective_filtered_chunk_io_setup() */ /*------------------------------------------------------------------------- - * Function: H5D__chunk_redistribute_shared_chunks - * - * Purpose: When performing a collective write on a Dataset with - * filters applied, this function is used to redistribute any - * chunks which are selected by more than one process, so as - * to preserve file integrity after the write by ensuring - * that any shared chunks are only modified by one process. - * - * The current implementation follows this 3-phase process: - * - * - Collect everyone's list of chunks into one large list, - * sort the list in increasing order of chunk offset in the - * file and hand the list off to rank 0 - * - * - Rank 0 scans the list looking for matching runs of chunk - * offset in the file (corresponding to a shared chunk which - * has been selected by more than one rank in the I/O - * operation) and for each shared chunk, it redistributes - * the chunk to the process writing to the chunk which - * currently has the least amount of chunks assigned to it - * by modifying the "new_owner" field in each of the list - * entries corresponding to that chunk - * - * - After the chunks have been redistributed, rank 0 re-sorts - * the list in order of previous owner so that each rank - * will get back exactly the array that they contributed to - * the redistribution operation, with the "new_owner" field - * of each chunk they are modifying having possibly been - * modified. Rank 0 then scatters each segment of the list - * back to its corresponding rank + * Function: H5D__mpio_redistribute_shared_chunks + * + * Purpose: When performing a parallel write on a chunked Dataset with + * filters applied, we must ensure that any particular chunk + * is only written to by a single MPI rank in order to avoid + * potential data races on the chunk. This function is used to + * redistribute (by assigning ownership to a single rank) any + * chunks which are selected by more than one MPI rank. + * + * An initial Allgather is performed to determine how many + * chunks each rank has selected in the write operation and + * then that number is compared against a threshold value to + * determine whether chunk redistribution should be done on + * MPI rank 0 only, or on all MPI ranks. * * Return: Non-negative on success/Negative on failure * - * Programmer: Jordan Henderson - * Monday, May 1, 2017 + *------------------------------------------------------------------------- + */ +static herr_t +H5D__mpio_redistribute_shared_chunks(H5D_filtered_collective_io_info_t *chunk_list, + size_t chunk_list_num_entries, const H5D_io_info_t *io_info, + const H5D_chunk_map_t *fm, int mpi_rank, int mpi_size, + size_t **rank_chunks_assigned_map) +{ + hbool_t redistribute_on_all_ranks; + size_t *num_chunks_map = NULL; + size_t coll_chunk_list_size = 0; + size_t i; + int mpi_code; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC + + HDassert(chunk_list || 0 == chunk_list_num_entries); + HDassert(io_info); + HDassert(fm); + HDassert(mpi_size > 1); /* No chunk sharing is possible for MPI Comm size of 1 */ + +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TRACE_ENTER(mpi_rank); + H5D_MPIO_TIME_START(mpi_rank, "Redistribute shared chunks"); +#endif + + /* + * Allocate an array for each rank to keep track of the number of + * chunks assigned to any other rank in order to cut down on future + * MPI communication. + */ + if (NULL == (num_chunks_map = H5MM_malloc((size_t)mpi_size * sizeof(*num_chunks_map)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't allocate assigned chunks array") + + /* Perform initial Allgather to determine the collective chunk list size */ + if (MPI_SUCCESS != (mpi_code = MPI_Allgather(&chunk_list_num_entries, 1, H5_SIZE_T_AS_MPI_TYPE, + num_chunks_map, 1, H5_SIZE_T_AS_MPI_TYPE, io_info->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code) + + for (i = 0; i < (size_t)mpi_size; i++) + coll_chunk_list_size += num_chunks_map[i]; + + /* + * Determine whether we should perform chunk redistribution on all + * ranks or just rank 0. For a relatively small number of chunks, + * we redistribute on all ranks to cut down on MPI communication + * overhead. For a larger number of chunks, we redistribute on + * rank 0 only to cut down on memory usage. + */ + redistribute_on_all_ranks = coll_chunk_list_size < H5D_CHUNK_REDISTRIBUTE_THRES; + + if (H5D__mpio_redistribute_shared_chunks_int(chunk_list, num_chunks_map, redistribute_on_all_ranks, + io_info, fm, mpi_rank, mpi_size) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTREDISTRIBUTE, FAIL, "can't redistribute shared chunks") + + /* + * If the caller provided a pointer for the mapping from + * rank value -> number of chunks assigned, return that + * mapping here. + */ + if (rank_chunks_assigned_map) { + /* + * If we performed chunk redistribution on rank 0 only, distribute + * the rank value -> number of chunks assigned mapping back to all + * ranks. + */ + if (!redistribute_on_all_ranks) { + if (MPI_SUCCESS != + (mpi_code = MPI_Bcast(num_chunks_map, mpi_size, H5_SIZE_T_AS_MPI_TYPE, 0, io_info->comm))) + HMPI_GOTO_ERROR(FAIL, "couldn't broadcast chunk mapping to other ranks", mpi_code) + } + + *rank_chunks_assigned_map = num_chunks_map; + } + +done: + if (!rank_chunks_assigned_map || (ret_value < 0)) { + num_chunks_map = H5MM_xfree(num_chunks_map); + } + +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TIME_STOP(mpi_rank); + H5D_MPIO_TRACE_EXIT(mpi_rank); +#endif + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__mpio_redistribute_shared_chunks() */ + +/*------------------------------------------------------------------------- + * Function: H5D__mpio_redistribute_shared_chunks_int + * + * Purpose: Routine to perform redistribution of shared chunks during + * parallel writes to datasets with filters applied. + * + * If `all_ranks_involved` is TRUE, chunk redistribution + * occurs on all MPI ranks. This is usually done when there + * is a relatively small number of chunks involved in order to + * cut down on MPI communication overhead while increasing + * total memory usage a bit. + * + * If `all_ranks_involved` is FALSE, only rank 0 will perform + * chunk redistribution. This is usually done when there is + * a relatively large number of chunks involved in order to + * cut down on total memory usage at the cost of increased + * overhead from MPI communication. + * + * This implementation is as follows: + * + * - All MPI ranks send their list of selected chunks to the + * ranks involved in chunk redistribution. Then, the + * involved ranks sort this new list in order of chunk + * index. + * + * - The involved ranks scan the list looking for matching + * runs of chunk index values (corresponding to a shared + * chunk which has been selected by more than one rank in + * the I/O operation) and for each shared chunk, + * redistribute the chunk to the MPI rank writing to the + * chunk which currently has the least amount of chunks + * assigned to it. This is done by modifying the "new_owner" + * field in each of the list entries corresponding to that + * chunk. The involved ranks then re-sort the list in order + * of original chunk owner so that each rank's section of + * contributed chunks is contiguous in the collective chunk + * list. + * + * - If chunk redistribution occurred on all ranks, each rank + * scans through the collective chunk list to find their + * contributed section of chunks and uses that to update + * their local chunk list with the newly-updated "new_owner" + * and "num_writers" fields. If chunk redistribution + * occurred only on rank 0, an MPI_Scatterv operation will + * be used to scatter the segments of the collective chunk + * list from rank 0 back to the corresponding ranks. + * + * Return: Non-negative on success/Negative on failure * *------------------------------------------------------------------------- */ static herr_t -H5D__chunk_redistribute_shared_chunks(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, - const H5D_chunk_map_t * fm, - H5D_filtered_collective_io_info_t *local_chunk_array, - size_t * local_chunk_array_num_entries) +H5D__mpio_redistribute_shared_chunks_int(H5D_filtered_collective_io_info_t *chunk_list, + size_t *num_chunks_assigned_map, hbool_t all_ranks_involved, + const H5D_io_info_t *io_info, const H5D_chunk_map_t *fm, + int mpi_rank, int mpi_size) { - H5D_filtered_collective_io_info_t *shared_chunks_info_array = - NULL; /* The list of all chunks selected in the operation by all processes */ - H5S_sel_iter_t *mem_iter = NULL; /* Memory iterator for H5D__gather_mem */ - unsigned char **mod_data = - NULL; /* Array of chunk modification data buffers sent by a process to new chunk owners */ - MPI_Request *send_requests = NULL; /* Array of MPI_Isend chunk modification data send requests */ - MPI_Status * send_statuses = NULL; /* Array of MPI_Isend chunk modification send statuses */ - hbool_t mem_iter_init = FALSE; - size_t shared_chunks_info_array_num_entries = 0; - size_t num_send_requests = 0; - size_t * num_assigned_chunks_array = NULL; - size_t i, last_assigned_idx; - int * send_counts = NULL; - int * send_displacements = NULL; - int scatter_recvcount_int; - int mpi_rank, mpi_size, mpi_code; + MPI_Datatype struct_type; + MPI_Datatype packed_type; + hbool_t struct_type_derived = FALSE; + hbool_t packed_type_derived = FALSE; + size_t i; + size_t coll_chunk_list_num_entries = 0; + void * coll_chunk_list = NULL; + int * counts_disps_array = NULL; + int * counts_ptr = NULL; + int * displacements_ptr = NULL; + int num_chunks_int; + int mpi_code; herr_t ret_value = SUCCEED; FUNC_ENTER_STATIC + HDassert(num_chunks_assigned_map); + HDassert(chunk_list || 0 == num_chunks_assigned_map[mpi_rank]); HDassert(io_info); - HDassert(type_info); HDassert(fm); - HDassert(local_chunk_array_num_entries); + HDassert(mpi_size > 1); - if ((mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file)) < 0) - HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi rank") - if ((mpi_size = H5F_mpi_get_size(io_info->dset->oloc.file)) < 0) - HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi size") +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TRACE_ENTER(mpi_rank); + H5D_MPIO_TIME_START(mpi_rank, "Redistribute shared chunks (internal)"); +#endif - /* Set to latest format for encoding dataspace */ - H5CX_set_libver_bounds(NULL); + /* + * Make sure it's safe to cast this rank's number + * of chunks to be sent into an int for MPI + */ + H5_CHECKED_ASSIGN(num_chunks_int, int, num_chunks_assigned_map[mpi_rank], size_t); - if (*local_chunk_array_num_entries) - if (NULL == (send_requests = - (MPI_Request *)H5MM_malloc(*local_chunk_array_num_entries * sizeof(MPI_Request)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate send requests buffer") + /* + * Phase 1 - Participate in collective gathering of every rank's + * list of chunks to the ranks which are performing the redistribution + * operation. + */ - if (NULL == (mem_iter = (H5S_sel_iter_t *)H5MM_malloc(sizeof(H5S_sel_iter_t)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate memory iterator") + if (all_ranks_involved || (mpi_rank == 0)) { + /* + * Allocate array to store the receive counts of each rank, as well as + * the displacements into the final array where each rank will place + * their data. The first half of the array contains the receive counts + * (in rank order), while the latter half contains the displacements + * (also in rank order). + */ + if (NULL == (counts_disps_array = H5MM_malloc(2 * (size_t)mpi_size * sizeof(*counts_disps_array)))) { + /* Push an error, but still participate in collective gather operation */ + HDONE_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate receive counts and displacements array") + } + else { + /* Set the receive counts from the assigned chunks map */ + counts_ptr = counts_disps_array; + + for (i = 0; i < (size_t)mpi_size; i++) + H5_CHECKED_ASSIGN(counts_ptr[i], int, num_chunks_assigned_map[i], size_t); + + /* Set the displacements into the receive buffer for the gather operation */ + displacements_ptr = &counts_disps_array[mpi_size]; + + *displacements_ptr = 0; + for (i = 1; i < (size_t)mpi_size; i++) + displacements_ptr[i] = displacements_ptr[i - 1] + counts_ptr[i - 1]; + } + } - /* Gather every rank's list of chunks to rank 0 to allow it to perform the redistribution operation. After - * this call, the gathered list will initially be sorted in increasing order of chunk offset in the file. + /* + * Construct MPI derived types for extracting information + * necessary for MPI communication */ - if (H5D__mpio_array_gatherv(local_chunk_array, *local_chunk_array_num_entries, - sizeof(H5D_filtered_collective_io_info_t), (void **)&shared_chunks_info_array, - &shared_chunks_info_array_num_entries, false, 0, io_info->comm, - H5D__cmp_filtered_collective_io_info_entry) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, "couldn't gather array") + if (H5D__mpio_get_chunk_redistribute_info_types(&packed_type, &packed_type_derived, &struct_type, + &struct_type_derived) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, + "can't create derived datatypes for chunk redistribution info") + + /* Perform gather operation */ + if (H5_mpio_gatherv_alloc(chunk_list, num_chunks_int, struct_type, counts_ptr, displacements_ptr, + packed_type, all_ranks_involved, 0, io_info->comm, mpi_rank, mpi_size, + &coll_chunk_list, &coll_chunk_list_num_entries) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, + "can't gather chunk redistribution info to involved ranks") - /* Rank 0 redistributes any shared chunks to new owners as necessary */ - if (mpi_rank == 0) { - if (NULL == (send_counts = (int *)H5MM_calloc((size_t)mpi_size * sizeof(int)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate send counts buffer") + /* + * If all ranks are redistributing shared chunks, we no + * longer need the receive counts and displacements array + */ + if (all_ranks_involved) { + counts_disps_array = H5MM_xfree(counts_disps_array); + } - if (NULL == (send_displacements = (int *)H5MM_malloc((size_t)mpi_size * sizeof(int)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate send displacements buffer") + /* + * Phase 2 - Involved ranks now redistribute any shared chunks to new + * owners as necessary. + */ - if (NULL == (num_assigned_chunks_array = (size_t *)H5MM_calloc((size_t)mpi_size * sizeof(size_t)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, - "unable to allocate number of assigned chunks array") + if (all_ranks_involved || (mpi_rank == 0)) { + H5D_chunk_redistribute_info_t *chunk_entry; + hsize_t curr_chunk_idx; + size_t set_begin_index; + int num_writers; + int new_chunk_owner; - for (i = 0; i < shared_chunks_info_array_num_entries;) { - H5D_filtered_collective_io_info_t *chunk_entry; - haddr_t last_seen_addr = shared_chunks_info_array[i].chunk_states.chunk_current.offset; - size_t set_begin_index = i; - size_t num_writers = 0; - int new_chunk_owner = shared_chunks_info_array[i].owners.original_owner; + /* Clear the mapping from rank value -> number of assigned chunks */ + HDmemset(num_chunks_assigned_map, 0, (size_t)mpi_size * sizeof(*num_chunks_assigned_map)); - /* Process each set of duplicate entries caused by another process writing to the same chunk */ - do { - chunk_entry = &shared_chunks_info_array[i]; + /* Sort collective chunk list according to chunk index */ + HDqsort(coll_chunk_list, coll_chunk_list_num_entries, sizeof(H5D_chunk_redistribute_info_t), + H5D__cmp_chunk_redistribute_info); - send_counts[chunk_entry->owners.original_owner] += (int)sizeof(*chunk_entry); + /* + * Process all chunks in the collective chunk list. + * Note that the loop counter is incremented by both + * the outer loop (while processing each entry in + * the collective chunk list) and the inner loop + * (while processing duplicate entries for shared + * chunks). + */ + chunk_entry = &((H5D_chunk_redistribute_info_t *)coll_chunk_list)[0]; + for (i = 0; i < coll_chunk_list_num_entries;) { + /* Set chunk's initial new owner to its original owner */ + new_chunk_owner = chunk_entry->orig_owner; - /* The new owner of the chunk is determined by the process + /* + * Set the current chunk index so we know when we've processed + * all duplicate entries for a particular shared chunk + */ + curr_chunk_idx = chunk_entry->chunk_idx; + + /* Reset the initial number of writers to this chunk */ + num_writers = 0; + + /* Set index for the beginning of this section of duplicate chunk entries */ + set_begin_index = i; + + /* + * Process each chunk entry in the set for the current + * (possibly shared) chunk and increment the loop counter + * while doing so. + */ + do { + /* + * The new owner of the chunk is determined by the rank * writing to the chunk which currently has the least amount * of chunks assigned to it */ - if (num_assigned_chunks_array[chunk_entry->owners.original_owner] < - num_assigned_chunks_array[new_chunk_owner]) - new_chunk_owner = chunk_entry->owners.original_owner; + if (num_chunks_assigned_map[chunk_entry->orig_owner] < + num_chunks_assigned_map[new_chunk_owner]) + new_chunk_owner = chunk_entry->orig_owner; + /* Update the number of writers to this particular chunk */ num_writers++; - } while (++i < shared_chunks_info_array_num_entries && - shared_chunks_info_array[i].chunk_states.chunk_current.offset == last_seen_addr); - /* Set all of the chunk entries' "new_owner" fields */ + chunk_entry++; + } while (++i < coll_chunk_list_num_entries && chunk_entry->chunk_idx == curr_chunk_idx); + + /* We should never have more writers to a chunk than the number of MPI ranks */ + HDassert(num_writers <= mpi_size); + + /* Set all processed chunk entries' "new_owner" and "num_writers" fields */ for (; set_begin_index < i; set_begin_index++) { - shared_chunks_info_array[set_begin_index].owners.new_owner = new_chunk_owner; - shared_chunks_info_array[set_begin_index].num_writers = num_writers; - } /* end for */ + H5D_chunk_redistribute_info_t *entry; - num_assigned_chunks_array[new_chunk_owner]++; - } /* end for */ + entry = &((H5D_chunk_redistribute_info_t *)coll_chunk_list)[set_begin_index]; + + entry->new_owner = new_chunk_owner; + entry->num_writers = num_writers; + } - /* Sort the new list in order of previous owner so that each original owner of a chunk - * entry gets that entry back, with the possibly newly-modified "new_owner" field + /* Update the number of chunks assigned to the MPI rank that now owns this chunk */ + num_chunks_assigned_map[new_chunk_owner]++; + } + + /* + * Re-sort the collective chunk list in order of original chunk owner + * so that each rank's section of contributed chunks is contiguous in + * the collective chunk list. + * + * NOTE: this re-sort is frail in that it needs to sort the collective + * chunk list so that each rank's section of contributed chunks + * is in the exact order it was contributed in, or things will + * be scrambled when each rank's local chunk list is updated. + * Therefore, the sorting algorithm here is tied to the one + * used during the I/O setup operation. Specifically, chunks + * are first sorted by ascending order of offset in the file and + * then by chunk index. In the future, a better redistribution + * algorithm may be devised that doesn't rely on frail sorting, + * but the current implementation is a quick and naive approach. */ - if (shared_chunks_info_array_num_entries > 1) - HDqsort(shared_chunks_info_array, shared_chunks_info_array_num_entries, - sizeof(H5D_filtered_collective_io_info_t), - H5D__cmp_filtered_collective_io_info_entry_owner); - - send_displacements[0] = 0; - for (i = 1; i < (size_t)mpi_size; i++) - send_displacements[i] = send_displacements[i - 1] + send_counts[i - 1]; - } /* end if */ + HDqsort(coll_chunk_list, coll_chunk_list_num_entries, sizeof(H5D_chunk_redistribute_info_t), + H5D__cmp_chunk_redistribute_info_orig_owner); + } - /* Scatter the segments of the list back to each process */ - H5_CHECKED_ASSIGN(scatter_recvcount_int, int, - *local_chunk_array_num_entries * sizeof(H5D_filtered_collective_io_info_t), size_t); - if (MPI_SUCCESS != - (mpi_code = MPI_Scatterv(shared_chunks_info_array, send_counts, send_displacements, MPI_BYTE, - local_chunk_array, scatter_recvcount_int, MPI_BYTE, 0, io_info->comm))) - HMPI_GOTO_ERROR(FAIL, "unable to scatter shared chunks info buffer", mpi_code) + if (all_ranks_involved) { + /* + * If redistribution occurred on all ranks, search for the section + * in the collective chunk list corresponding to this rank's locally + * selected chunks and update the local list after redistribution. + */ + for (i = 0; i < coll_chunk_list_num_entries; i++) + if (mpi_rank == ((H5D_chunk_redistribute_info_t *)coll_chunk_list)[i].orig_owner) + break; - if (shared_chunks_info_array) { - H5MM_free(shared_chunks_info_array); - shared_chunks_info_array = NULL; - } /* end if */ + for (size_t j = 0; j < (size_t)num_chunks_int; j++) { + H5D_chunk_redistribute_info_t *coll_entry; + + coll_entry = &((H5D_chunk_redistribute_info_t *)coll_chunk_list)[i++]; + + chunk_list[j].new_owner = coll_entry->new_owner; + chunk_list[j].num_writers = coll_entry->num_writers; + } + } + else { + /* + * If redistribution occurred only on rank 0, scatter the segments + * of the collective chunk list back to each rank so that their + * local chunk lists get updated + */ + if (MPI_SUCCESS != + (mpi_code = MPI_Scatterv(coll_chunk_list, counts_ptr, displacements_ptr, packed_type, chunk_list, + num_chunks_int, struct_type, 0, io_info->comm))) + HMPI_GOTO_ERROR(FAIL, "unable to scatter shared chunks info buffer", mpi_code) + } + +#ifdef H5Dmpio_DEBUG + H5D__mpio_dump_collective_filtered_chunk_list(chunk_list, num_chunks_assigned_map[mpi_rank], mpi_rank); +#endif + +done: + H5MM_free(coll_chunk_list); + + if (struct_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&struct_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + } + if (packed_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&packed_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + } + + H5MM_free(counts_disps_array); + +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TIME_STOP(mpi_rank); + H5D_MPIO_TRACE_EXIT(mpi_rank); +#endif + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__mpio_redistribute_shared_chunks_int() */ + +/*------------------------------------------------------------------------- + * Function: H5D__mpio_share_chunk_modification_data + * + * Purpose: When performing a parallel write on a chunked dataset with + * filters applied, we must first ensure that any particular + * chunk is only written to by a single MPI rank in order to + * avoid potential data races on the chunk. Once dataset + * chunks have been redistributed in a suitable manner, each + * MPI rank must send its chunk data to other ranks for each + * chunk it no longer owns. + * + * The current implementation here follows the Nonblocking + * Consensus algorithm described in: + * http://unixer.de/publications/img/hoefler-dsde-protocols.pdf + * + * First, each MPI rank scans through its list of selected + * chunks and does the following for each chunk: + * + * * If a chunk in the MPI rank's chunk list is still owned + * by that rank, the rank checks how many messages are + * incoming for that chunk and adds that to its running + * total. Then, the rank updates its local chunk list so + * that any previous chunk entries for chunks that are no + * longer owned by the rank get overwritten by chunk + * entries for chunks the rank still owns. Since the data + * for the chunks no longer owned will have already been + * sent, those chunks can effectively be discarded. + * * If a chunk in the MPI rank's chunk list is no longer + * owned by that rank, the rank sends the data it wishes to + * update the chunk with to the MPI rank that now has + * ownership of that chunk. To do this, it encodes the + * chunk's index, its selection in the chunk and its + * modification data into a buffer and then posts a + * non-blocking MPI_Issend to the owning rank. + * + * Once this step is complete, all MPI ranks allocate arrays + * to hold chunk message receive buffers and MPI request + * objects for each non-blocking receive they will post for + * incoming chunk modification messages. Then, all MPI ranks + * enter a loop that alternates between non-blocking + * MPI_Iprobe calls to probe for incoming messages and + * MPI_Testall calls to see if all send requests have + * completed. As chunk modification messages arrive, + * non-blocking MPI_Irecv calls will be posted for each + * message. + * + * Once all send requests have completed, an MPI_Ibarrier is + * posted and the loop then alternates between MPI_Iprobe + * calls and MPI_Test calls to check if all ranks have reached + * the non-blocking barrier. Once all ranks have reached the + * barrier, processing can move on to updating the selected + * chunks that are owned in the operation. + * + * Any chunk messages that were received from other ranks + * will be returned through the `chunk_msg_bufs` array and + * `chunk_msg_bufs_len` will be set appropriately. + * + * NOTE: The use of non-blocking sends and receives of chunk + * data here may contribute to large amounts of memory + * usage/MPI request overhead if the number of shared + * chunks is high. If this becomes a problem, it may be + * useful to split the message receiving loop away so + * that chunk modification messages can be received and + * processed immediately (MPI_Recv) using a single chunk + * message buffer. However, it's possible this may + * degrade performance since the chunk message sends + * are synchronous (MPI_Issend) in the Nonblocking + * Consensus algorithm. + * + * Return: Non-negative on success/Negative on failure + * + *------------------------------------------------------------------------- + */ +static herr_t +H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk_list, + size_t *chunk_list_num_entries, H5D_io_info_t *io_info, + const H5D_type_info_t *type_info, int mpi_rank, int mpi_size, + H5D_filtered_collective_io_info_t **chunk_hash_table, + unsigned char ***chunk_msg_bufs, int *chunk_msg_bufs_len) +{ +#if MPI_VERSION >= 3 + H5D_filtered_collective_io_info_t *chunk_table = NULL; + H5S_sel_iter_t * mem_iter = NULL; + unsigned char ** msg_send_bufs = NULL; + unsigned char ** msg_recv_bufs = NULL; + MPI_Request * send_requests = NULL; + MPI_Request * recv_requests = NULL; + MPI_Request ibarrier = MPI_REQUEST_NULL; + hbool_t mem_iter_init = FALSE; + hbool_t ibarrier_posted = FALSE; + size_t send_bufs_nalloc = 0; + size_t num_send_requests = 0; + size_t num_recv_requests = 0; + size_t num_msgs_incoming = 0; + size_t last_assigned_idx; + size_t i; + int mpi_code; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC - /* Now that the chunks have been redistributed, each process must send its modification data - * to the new owners of any of the chunks it previously possessed. Accordingly, each process - * must also issue asynchronous receives for any messages it may receive for each of the - * chunks it is assigned, in order to avoid potential deadlocking issues. + HDassert(chunk_list_num_entries); + HDassert(chunk_list || 0 == *chunk_list_num_entries); + HDassert(io_info); + HDassert(type_info); + HDassert(mpi_size > 1); + HDassert(chunk_msg_bufs); + HDassert(chunk_msg_bufs_len); + +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TRACE_ENTER(mpi_rank); + H5D_MPIO_TIME_START(mpi_rank, "Share chunk modification data"); +#endif + + /* Set to latest format for encoding dataspace */ + H5CX_set_libver_bounds(NULL); + + if (*chunk_list_num_entries) { + /* Allocate a selection iterator for iterating over chunk dataspaces */ + if (NULL == (mem_iter = H5FL_MALLOC(H5S_sel_iter_t))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate dataspace selection iterator") + + /* + * Allocate send buffer and MPI_Request arrays for non-blocking + * sends of outgoing chunk messages + */ + send_bufs_nalloc = H5D_CHUNK_NUM_SEND_MSGS_INIT; + if (NULL == (msg_send_bufs = H5MM_malloc(send_bufs_nalloc * sizeof(*msg_send_bufs)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, + "couldn't allocate chunk modification message buffer array") + + if (NULL == (send_requests = H5MM_malloc(send_bufs_nalloc * sizeof(*send_requests)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate send requests array") + } + + /* + * For each chunk this rank owns, add to the total number of + * incoming MPI messages, then update the local chunk list to + * overwrite any previous chunks no longer owned by this rank. + * Since the data for those chunks will have already been sent, + * this rank should no longer be interested in them and they + * can effectively be discarded. This bookkeeping also makes + * the code for the collective file space re-allocation and + * chunk re-insertion operations a bit simpler. + * + * For each chunk this rank doesn't own, use non-blocking + * synchronous sends to send the data this rank is writing to + * the rank that does own the chunk. */ - if (*local_chunk_array_num_entries) - if (NULL == (mod_data = (unsigned char **)H5MM_malloc(*local_chunk_array_num_entries * - sizeof(unsigned char *)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate modification data buffer array") - - /* Perform all the sends on the chunks that this rank doesn't own */ - /* (Sends and recvs must be two separate loops, to avoid deadlock) */ - for (i = 0, last_assigned_idx = 0; i < *local_chunk_array_num_entries; i++) { - H5D_filtered_collective_io_info_t *chunk_entry = &local_chunk_array[i]; - - if (mpi_rank != chunk_entry->owners.new_owner) { - H5D_chunk_info_t *chunk_info = NULL; + for (i = 0, last_assigned_idx = 0; i < *chunk_list_num_entries; i++) { + H5D_filtered_collective_io_info_t *chunk_entry = &chunk_list[i]; + + if (mpi_rank == chunk_entry->new_owner) { + num_msgs_incoming += (size_t)(chunk_entry->num_writers - 1); + + /* + * Overwrite chunk entries this rank doesn't own with entries that it + * does own, since it has sent the necessary data and is no longer + * interested in the chunks it doesn't own. + */ + chunk_list[last_assigned_idx] = chunk_list[i]; + + /* + * Since, at large scale, a chunk's index value may be larger than + * the maximum value that can be stored in an int, we cannot rely + * on using a chunk's index value as the tag for the MPI messages + * sent/received for a chunk. Therefore, add this chunk to a hash + * table with the chunk's index as a key so that we can quickly find + * the chunk when processing chunk messages that were received. The + * message itself will contain the chunk's index so we can update + * the correct chunk with the received data. + */ + HASH_ADD(hh, chunk_table, index_info.chunk_idx, sizeof(hsize_t), &chunk_list[last_assigned_idx]); + + last_assigned_idx++; + } + else { + H5D_chunk_info_t *chunk_info = chunk_entry->chunk_info; unsigned char * mod_data_p = NULL; hsize_t iter_nelmts; - size_t mod_data_size; + size_t mod_data_size = 0; + size_t space_size = 0; - /* Look up the chunk and get its file and memory dataspaces */ - if (NULL == (chunk_info = (H5D_chunk_info_t *)H5SL_search(fm->sel_chunks, &chunk_entry->index))) - HGOTO_ERROR(H5E_DATASPACE, H5E_NOTFOUND, FAIL, "can't locate chunk in skip list") + /* Add the size of the chunk index to the encoded size */ + mod_data_size += sizeof(hsize_t); - /* Determine size of serialized chunk file dataspace, plus the size of - * the data being written - */ - if (H5S_encode(chunk_info->fspace, &mod_data_p, &mod_data_size) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "unable to get encoded dataspace size") + /* Determine size of serialized chunk file dataspace */ + if (H5S_encode(chunk_info->fspace, &mod_data_p, &space_size) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to get encoded dataspace size") + mod_data_size += space_size; + /* Determine size of data being written */ iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace); - H5_CHECK_OVERFLOW(iter_nelmts, hsize_t, size_t); + mod_data_size += (size_t)iter_nelmts * type_info->src_type_size; - if (NULL == (mod_data[num_send_requests] = (unsigned char *)H5MM_malloc(mod_data_size))) + if (NULL == (msg_send_bufs[num_send_requests] = H5MM_malloc(mod_data_size))) HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, - "couldn't allocate chunk modification send buffer") + "couldn't allocate chunk modification message buffer") + + mod_data_p = msg_send_bufs[num_send_requests]; + + /* Store the chunk's index into the buffer */ + HDmemcpy(mod_data_p, &chunk_entry->index_info.chunk_idx, sizeof(hsize_t)); + mod_data_p += sizeof(hsize_t); /* Serialize the chunk's file dataspace into the buffer */ - mod_data_p = mod_data[num_send_requests]; if (H5S_encode(chunk_info->fspace, &mod_data_p, &mod_data_size) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "unable to encode dataspace") /* Initialize iterator for memory selection */ - if (H5S_select_iter_init(mem_iter, chunk_info->mspace, type_info->src_type_size, 0) < 0) + if (H5S_select_iter_init(mem_iter, chunk_info->mspace, type_info->src_type_size, + H5S_SEL_ITER_SHARE_WITH_DATASPACE) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to initialize memory selection information") mem_iter_init = TRUE; @@ -3009,466 +3943,2057 @@ H5D__chunk_redistribute_shared_chunks(const H5D_io_info_t *io_info, const H5D_ty if (0 == H5D__gather_mem(io_info->u.wbuf, mem_iter, (size_t)iter_nelmts, mod_data_p)) HGOTO_ERROR(H5E_IO, H5E_CANTGATHER, FAIL, "couldn't gather from write buffer") - /* Send modification data to new owner */ + /* + * Ensure that the size of the chunk data being sent can be + * safely cast to an int for MPI. Note that this should + * generally be OK for now (unless a rank is sending a + * whole 32-bit-sized chunk of data + its encoded selection), + * but if we allow larger than 32-bit-sized chunks in the + * future, this may become a problem and derived datatypes + * will need to be used. + */ H5_CHECK_OVERFLOW(mod_data_size, size_t, int) - H5_CHECK_OVERFLOW(chunk_entry->index, hsize_t, int) + + /* Send modification data to new owner */ if (MPI_SUCCESS != - (mpi_code = MPI_Isend(mod_data[num_send_requests], (int)mod_data_size, MPI_BYTE, - chunk_entry->owners.new_owner, (int)chunk_entry->index, io_info->comm, - &send_requests[num_send_requests]))) - HMPI_GOTO_ERROR(FAIL, "MPI_Isend failed", mpi_code) + (mpi_code = MPI_Issend(msg_send_bufs[num_send_requests], (int)mod_data_size, MPI_BYTE, + chunk_entry->new_owner, H5D_CHUNK_MOD_DATA_TAG, io_info->comm, + &send_requests[num_send_requests]))) + HMPI_GOTO_ERROR(FAIL, "MPI_Issend failed", mpi_code) + + num_send_requests++; + + /* Resize send buffer and send request arrays if necessary */ + if (num_send_requests == send_bufs_nalloc) { + void *tmp_alloc; + + send_bufs_nalloc = (size_t)((double)send_bufs_nalloc * 1.5); + + if (NULL == + (tmp_alloc = H5MM_realloc(msg_send_bufs, send_bufs_nalloc * sizeof(*msg_send_bufs)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, + "couldn't resize chunk modification message buffer array") + msg_send_bufs = tmp_alloc; + + if (NULL == + (tmp_alloc = H5MM_realloc(send_requests, send_bufs_nalloc * sizeof(*send_requests)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't resize send requests array") + send_requests = tmp_alloc; + } - if (mem_iter_init && H5S_SELECT_ITER_RELEASE(mem_iter) < 0) + if (H5S_SELECT_ITER_RELEASE(mem_iter) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release memory selection iterator") mem_iter_init = FALSE; + } + } - num_send_requests++; - } /* end if */ - } /* end for */ + /* Check if the number of send or receive requests will overflow an int (MPI requirement) */ + if (num_send_requests > INT_MAX || num_msgs_incoming > INT_MAX) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, + "too many shared chunks in parallel filtered write operation") - /* Perform all the recvs on the chunks this rank owns */ - for (i = 0, last_assigned_idx = 0; i < *local_chunk_array_num_entries; i++) { - H5D_filtered_collective_io_info_t *chunk_entry = &local_chunk_array[i]; + H5_CHECK_OVERFLOW(num_send_requests, size_t, int) + H5_CHECK_OVERFLOW(num_msgs_incoming, size_t, int) - if (mpi_rank == chunk_entry->owners.new_owner) { - /* Allocate all necessary buffers for an asynchronous receive operation */ - if (chunk_entry->num_writers > 1) { - MPI_Message message; - MPI_Status status; - size_t j; + /* + * Allocate receive buffer and MPI_Request arrays for non-blocking + * receives of incoming chunk messages + */ + if (num_msgs_incoming) { + if (NULL == (msg_recv_bufs = H5MM_malloc(num_msgs_incoming * sizeof(*msg_recv_bufs)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, + "couldn't allocate chunk modification message buffer array") - chunk_entry->async_info.num_receive_requests = (int)chunk_entry->num_writers - 1; - if (NULL == (chunk_entry->async_info.receive_requests_array = (MPI_Request *)H5MM_malloc( - (size_t)chunk_entry->async_info.num_receive_requests * sizeof(MPI_Request)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate async requests array") + if (NULL == (recv_requests = H5MM_malloc(num_msgs_incoming * sizeof(*recv_requests)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate receive requests array") + } - if (NULL == - (chunk_entry->async_info.receive_buffer_array = (unsigned char **)H5MM_malloc( - (size_t)chunk_entry->async_info.num_receive_requests * sizeof(unsigned char *)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate async receive buffers") + /* Process any incoming messages until everyone is done */ + do { + MPI_Status status; + int msg_flag; - for (j = 0; j < chunk_entry->num_writers - 1; j++) { - int count = 0; + /* Probe for an incoming message from any rank */ + if (MPI_SUCCESS != (mpi_code = MPI_Iprobe(MPI_ANY_SOURCE, H5D_CHUNK_MOD_DATA_TAG, io_info->comm, + &msg_flag, &status))) + HMPI_GOTO_ERROR(FAIL, "MPI_Iprobe failed", mpi_code) - /* Probe for a particular message from any process, removing that message - * from the receive queue in the process and allocating that much memory - * for the asynchronous receive - */ - if (MPI_SUCCESS != (mpi_code = MPI_Mprobe(MPI_ANY_SOURCE, (int)chunk_entry->index, - io_info->comm, &message, &status))) - HMPI_GOTO_ERROR(FAIL, "MPI_Mprobe failed", mpi_code) - - if (MPI_SUCCESS != (mpi_code = MPI_Get_count(&status, MPI_BYTE, &count))) - HMPI_GOTO_ERROR(FAIL, "MPI_Get_count failed", mpi_code) - - HDassert(count >= 0); - if (NULL == (chunk_entry->async_info.receive_buffer_array[j] = - (unsigned char *)H5MM_malloc((size_t)count * sizeof(char *)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, - "unable to allocate modification data receive buffer") - - if (MPI_SUCCESS != (mpi_code = MPI_Imrecv( - chunk_entry->async_info.receive_buffer_array[j], count, MPI_BYTE, - &message, &chunk_entry->async_info.receive_requests_array[j]))) - HMPI_GOTO_ERROR(FAIL, "MPI_Imrecv failed", mpi_code) - } /* end for */ - } /* end if */ - - local_chunk_array[last_assigned_idx++] = local_chunk_array[i]; - } /* end else */ - } /* end for */ + /* + * If a message was found, allocate a buffer for the message and + * post a non-blocking receive to receive it + */ + if (msg_flag) { +#if MPI_VERSION >= 3 + MPI_Count msg_size = 0; - *local_chunk_array_num_entries = last_assigned_idx; + if (MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&status, MPI_BYTE, &msg_size))) + HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements_x failed", mpi_code) - /* Wait for all async send requests to complete before returning */ - if (num_send_requests) { - if (NULL == (send_statuses = (MPI_Status *)H5MM_malloc(num_send_requests * sizeof(MPI_Status)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate send statuses buffer") + H5_CHECK_OVERFLOW(msg_size, MPI_Count, int) +#else + int msg_size = 0; - H5_CHECK_OVERFLOW(num_send_requests, size_t, int); - if (MPI_SUCCESS != (mpi_code = MPI_Waitall((int)num_send_requests, send_requests, send_statuses))) - HMPI_GOTO_ERROR(FAIL, "MPI_Waitall failed", mpi_code) - } /* end if */ + if (MPI_SUCCESS != (mpi_code = MPI_Get_elements(&status, MPI_BYTE, &msg_size))) + HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code) +#endif -done: - /* Now that all async send requests have completed, free up the send - * buffers used in the async operations + if (msg_size <= 0) + HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "invalid chunk modification message size") + + HDassert((num_recv_requests + 1) <= num_msgs_incoming); + if (NULL == + (msg_recv_bufs[num_recv_requests] = H5MM_malloc((size_t)msg_size * sizeof(unsigned char)))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, + "couldn't allocate chunk modification message receive buffer") + + if (MPI_SUCCESS != (mpi_code = MPI_Irecv(msg_recv_bufs[num_recv_requests], (int)msg_size, + MPI_BYTE, status.MPI_SOURCE, H5D_CHUNK_MOD_DATA_TAG, + io_info->comm, &recv_requests[num_recv_requests]))) + HMPI_GOTO_ERROR(FAIL, "MPI_Irecv failed", mpi_code) + + num_recv_requests++; + } + + if (ibarrier_posted) { + int ibarrier_completed; + + if (MPI_SUCCESS != (mpi_code = MPI_Test(&ibarrier, &ibarrier_completed, MPI_STATUS_IGNORE))) + HMPI_GOTO_ERROR(FAIL, "MPI_Test failed", mpi_code) + + if (ibarrier_completed) + break; + } + else { + int all_sends_completed; + + /* Determine if all send requests have completed */ + if (MPI_SUCCESS != (mpi_code = MPI_Testall((int)num_send_requests, send_requests, + &all_sends_completed, MPI_STATUSES_IGNORE))) + HMPI_GOTO_ERROR(FAIL, "MPI_Testall failed", mpi_code) + + if (all_sends_completed) { + /* Post non-blocking barrier */ + if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(io_info->comm, &ibarrier))) + HMPI_GOTO_ERROR(FAIL, "MPI_Ibarrier failed", mpi_code) + ibarrier_posted = TRUE; + + /* + * Now that all send requests have completed, free up the + * send buffers used in the non-blocking operations + */ + if (msg_send_bufs) { + for (i = 0; i < num_send_requests; i++) { + if (msg_send_bufs[i]) + H5MM_free(msg_send_bufs[i]); + } + + msg_send_bufs = H5MM_xfree(msg_send_bufs); + } + } + } + } while (1); + + /* + * Ensure all receive requests have completed before moving on. + * For linked-chunk I/O, more overlap with computation could + * theoretically be achieved by returning the receive requests + * array and postponing this wait until during chunk updating + * when the data is really needed. However, multi-chunk I/O + * only updates a chunk at a time and the messages may not come + * in the order that chunks are processed. So, the safest way to + * support both I/O modes is to simply make sure all messages + * are available. */ - for (i = 0; i < num_send_requests; i++) { - if (mod_data[i]) - H5MM_free(mod_data[i]); - } /* end for */ + if (MPI_SUCCESS != (mpi_code = MPI_Waitall((int)num_recv_requests, recv_requests, MPI_STATUSES_IGNORE))) + HMPI_GOTO_ERROR(FAIL, "MPI_Waitall failed", mpi_code) + + /* Set the new number of locally-selected chunks */ + *chunk_list_num_entries = last_assigned_idx; + + /* Return chunk message buffers if any were received */ + *chunk_hash_table = chunk_table; + *chunk_msg_bufs = msg_recv_bufs; + *chunk_msg_bufs_len = (int)num_recv_requests; +done: + if (ret_value < 0) { + /* If this rank failed, make sure to participate in collective barrier */ + if (!ibarrier_posted) { + if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(io_info->comm, &ibarrier))) + HMPI_GOTO_ERROR(FAIL, "MPI_Ibarrier failed", mpi_code) + } + + if (num_send_requests) { + for (i = 0; i < num_send_requests; i++) { + MPI_Cancel(&send_requests[i]); + } + } + + if (recv_requests) { + for (i = 0; i < num_recv_requests; i++) { + MPI_Cancel(&recv_requests[i]); + } + } + + if (msg_recv_bufs) { + for (i = 0; i < num_recv_requests; i++) { + H5MM_free(msg_recv_bufs[i]); + } + + H5MM_free(msg_recv_bufs); + } + + HASH_CLEAR(hh, chunk_table); + } + + if (recv_requests) + H5MM_free(recv_requests); if (send_requests) H5MM_free(send_requests); - if (send_statuses) - H5MM_free(send_statuses); - if (send_counts) - H5MM_free(send_counts); - if (send_displacements) - H5MM_free(send_displacements); - if (mod_data) - H5MM_free(mod_data); - if (mem_iter_init && H5S_SELECT_ITER_RELEASE(mem_iter) < 0) - HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator") - if (mem_iter) - H5MM_free(mem_iter); - if (num_assigned_chunks_array) - H5MM_free(num_assigned_chunks_array); - if (shared_chunks_info_array) - H5MM_free(shared_chunks_info_array); + + if (msg_send_bufs) { + for (i = 0; i < num_send_requests; i++) { + if (msg_send_bufs[i]) + H5MM_free(msg_send_bufs[i]); + } + + H5MM_free(msg_send_bufs); + } + + if (mem_iter) { + if (mem_iter_init && H5S_SELECT_ITER_RELEASE(mem_iter) < 0) + HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release dataspace selection iterator") + mem_iter = H5FL_FREE(H5S_sel_iter_t, mem_iter); + } + +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TIME_STOP(mpi_rank); + H5D_MPIO_TRACE_EXIT(mpi_rank); +#endif FUNC_LEAVE_NOAPI(ret_value) -} /* end H5D__chunk_redistribute_shared_chunks() */ +#else + FUNC_ENTER_STATIC + HERROR( + H5E_DATASET, H5E_WRITEERROR, + "unable to send chunk modification data between MPI ranks - MPI version < 3 (MPI_Ibarrier missing)") + FUNC_LEAVE_NOAPI(FAIL) #endif +} /* end H5D__mpio_share_chunk_modification_data() */ /*------------------------------------------------------------------------- - * Function: H5D__mpio_filtered_collective_write_type + * Function: H5D__mpio_collective_filtered_chunk_common_io * - * Purpose: Constructs a MPI derived datatype for both the memory and - * the file for a collective write of filtered chunks. The - * datatype contains the offsets in the file and the locations - * of the filtered chunk data buffers. + * Purpose: This routine performs the common part of collective I/O + * when reading or writing filtered chunks collectively. * * Return: Non-negative on success/Negative on failure * - * Programmer: Jordan Henderson - * Tuesday, November 22, 2016 - * *------------------------------------------------------------------------- */ static herr_t -H5D__mpio_filtered_collective_write_type(H5D_filtered_collective_io_info_t *chunk_list, size_t num_entries, - MPI_Datatype *new_mem_type, hbool_t *mem_type_derived, - MPI_Datatype *new_file_type, hbool_t *file_type_derived) +H5D__mpio_collective_filtered_chunk_common_io(H5D_filtered_collective_io_info_t *chunk_list, + size_t chunk_list_num_entries, const H5D_io_info_t *io_info, + const H5D_type_info_t *type_info, int mpi_size) { - MPI_Aint *write_buf_array = NULL; /* Relative displacements of filtered chunk data buffers */ - MPI_Aint *file_offset_array = NULL; /* Chunk offsets in the file */ - int * length_array = NULL; /* Filtered Chunk lengths */ - herr_t ret_value = SUCCEED; + H5D_io_info_t coll_io_info; + H5D_storage_t ctg_store; + MPI_Datatype file_type = MPI_DATATYPE_NULL; + MPI_Datatype mem_type = MPI_DATATYPE_NULL; + hbool_t mem_type_is_derived = FALSE; + hbool_t file_type_is_derived = FALSE; + hsize_t mpi_buf_count; + haddr_t base_read_offset = HADDR_UNDEF; + size_t num_chunks; + size_t i; + char fake_buf; /* Used as a fake buffer for ranks with no chunks, thus a NULL buf pointer */ + int mpi_code; + herr_t ret_value = SUCCEED; FUNC_ENTER_STATIC - HDassert(chunk_list); - HDassert(new_mem_type); - HDassert(mem_type_derived); - HDassert(new_file_type); - HDassert(file_type_derived); + HDassert(chunk_list || 0 == chunk_list_num_entries); + HDassert(io_info); + HDassert(type_info); - if (num_entries > 0) { - size_t i; - int mpi_code; - void * base_buf; - - H5_CHECK_OVERFLOW(num_entries, size_t, int); - - /* Allocate arrays */ - if (NULL == (length_array = (int *)H5MM_malloc((size_t)num_entries * sizeof(int)))) - HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, - "memory allocation failed for filtered collective write length array") - if (NULL == (write_buf_array = (MPI_Aint *)H5MM_malloc((size_t)num_entries * sizeof(MPI_Aint)))) - HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, - "memory allocation failed for filtered collective write buf length array") - if (NULL == (file_offset_array = (MPI_Aint *)H5MM_malloc((size_t)num_entries * sizeof(MPI_Aint)))) - HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, - "memory allocation failed for collective write offset array") - - /* Ensure the list is sorted in ascending order of offset in the file */ - HDqsort(chunk_list, num_entries, sizeof(H5D_filtered_collective_io_info_t), - H5D__cmp_filtered_collective_io_info_entry); + /* Initialize temporary I/O info */ + coll_io_info = *io_info; - base_buf = chunk_list[0].buf; - for (i = 0; i < num_entries; i++) { - /* Set up the offset in the file, the length of the chunk data, and the relative - * displacement of the chunk data write buffer - */ - file_offset_array[i] = (MPI_Aint)chunk_list[i].chunk_states.new_chunk.offset; - length_array[i] = (int)chunk_list[i].chunk_states.new_chunk.length; - write_buf_array[i] = (MPI_Aint)chunk_list[i].buf - (MPI_Aint)base_buf; - } /* end for */ + /* + * Construct MPI derived datatype for collective I/O on chunks + */ + if (H5D__mpio_collective_filtered_io_type(chunk_list, chunk_list_num_entries, io_info->op_type, &mem_type, + &mem_type_is_derived, &file_type, &file_type_is_derived) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_BADTYPE, FAIL, "couldn't create MPI I/O type for chunk I/O") - /* Create memory MPI type */ - if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)num_entries, length_array, - write_buf_array, MPI_BYTE, new_mem_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) - *mem_type_derived = TRUE; - if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_mem_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) - - /* Create file MPI type */ - if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)num_entries, length_array, - file_offset_array, MPI_BYTE, new_file_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) - *file_type_derived = TRUE; - if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_file_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) - } /* end if */ + /* + * For reads, determine how many chunks are actually being read. + * Note that if this is a read during a write operation + * (read chunk -> unfilter -> modify -> write back), some + * chunks may not need to be read if they're being fully + * overwritten during a write operation. + */ + if (io_info->op_type == H5D_IO_OP_READ) { + for (i = 0, num_chunks = 0; i < chunk_list_num_entries; i++) { + HDassert(chunk_list[i].buf); + + if (chunk_list[i].need_read) { + if (!H5F_addr_defined(base_read_offset)) + base_read_offset = chunk_list[i].chunk_current.offset; + + num_chunks++; + } + } + } + else + num_chunks = chunk_list_num_entries; + + /* + * If this rank doesn't have a selection, it can + * skip I/O if independent I/O was requested at + * the low level, or if the MPI communicator size + * is 1. + * + * Otherwise, this rank has to participate in + * collective I/O, but probably has a NULL buf + * pointer, so override to a fake buffer since our + * write/read function expects one. + */ + if (num_chunks == 0) { + H5FD_mpio_collective_opt_t coll_opt_mode; + + /* Get the collective_opt property to check whether the application wants to do IO individually. */ + if (H5CX_get_mpio_coll_opt(&coll_opt_mode) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get MPI-I/O collective_opt property") + + if ((mpi_size == 1) || (H5FD_MPIO_INDIVIDUAL_IO == coll_opt_mode)) { + HGOTO_DONE(SUCCEED) + } + else { + if (io_info->op_type == H5D_IO_OP_WRITE) + coll_io_info.u.wbuf = &fake_buf; + else + coll_io_info.u.rbuf = &fake_buf; + } + } + + /* + * Setup for I/O operation + */ + + mpi_buf_count = (num_chunks) ? 1 : 0; + + if (num_chunks) { + /* + * Setup the base storage address for this operation + * to be the first chunk's file address + */ + if (io_info->op_type == H5D_IO_OP_WRITE) + ctg_store.contig.dset_addr = chunk_list[0].chunk_new.offset; + else + ctg_store.contig.dset_addr = base_read_offset; + } + else + ctg_store.contig.dset_addr = 0; + + ctg_store.contig.dset_size = (hsize_t)io_info->dset->shared->layout.u.chunk.size; + coll_io_info.store = &ctg_store; + + /* Perform I/O */ + if (H5D__final_collective_io(&coll_io_info, type_info, mpi_buf_count, file_type, mem_type) < 0) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "couldn't finish MPI I/O") done: - if (write_buf_array) - H5MM_free(write_buf_array); - if (file_offset_array) - H5MM_free(file_offset_array); - if (length_array) - H5MM_free(length_array); + /* Free the MPI buf and file types, if they were derived */ + if (mem_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + if (file_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&file_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) FUNC_LEAVE_NOAPI(ret_value) -} /* end H5D__mpio_filtered_collective_write_type() */ +} /* end H5D__mpio_collective_filtered_chunk_common_io() */ /*------------------------------------------------------------------------- - * Function: H5D__filtered_collective_chunk_entry_io + * Function: H5D__mpio_collective_filtered_chunk_read * - * Purpose: Given an entry for a filtered chunk, performs the necessary - * steps for updating the chunk data during a collective - * write, or for reading the chunk from file during a - * collective read. + * Purpose: This routine coordinates a collective read across all ranks + * of the chunks they have selected. Each rank will then go + * and * * Return: Non-negative on success/Negative on failure * - * Programmer: Jordan Henderson - * Wednesday, January 18, 2017 - * *------------------------------------------------------------------------- */ static herr_t -H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk_entry, - const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, - const H5D_chunk_map_t *fm) +H5D__mpio_collective_filtered_chunk_read(H5D_filtered_collective_io_info_t *chunk_list, + size_t chunk_list_num_entries, const H5D_io_info_t *io_info, + const H5D_type_info_t *type_info, int mpi_rank, int mpi_size) { - H5D_chunk_info_t *chunk_info = NULL; - H5S_sel_iter_t * mem_iter = NULL; /* Memory iterator for H5D__scatter_mem/H5D__gather_mem */ - H5S_sel_iter_t * file_iter = NULL; - H5Z_EDC_t err_detect; /* Error detection info */ - H5Z_cb_t filter_cb; /* I/O filter callback function */ - unsigned filter_mask = 0; - hsize_t iter_nelmts; /* Number of points to iterate over for the chunk IO operation */ - hssize_t extent_npoints; - hsize_t true_chunk_size; - hbool_t mem_iter_init = FALSE; - hbool_t file_iter_init = FALSE; - size_t buf_size; - size_t i; - H5S_t * dataspace = NULL; /* Other process' dataspace for the chunk */ - void * tmp_gath_buf = NULL; /* Temporary gather buffer to gather into from application buffer - before scattering out to the chunk data buffer (when writing data), - or vice versa (when reading data) */ - int mpi_code; - herr_t ret_value = SUCCEED; + H5D_fill_buf_info_t fb_info; + H5D_chunk_info_t * chunk_info = NULL; + H5D_io_info_t coll_io_info; + H5Z_EDC_t err_detect; /* Error detection info */ + H5Z_cb_t filter_cb; /* I/O filter callback function */ + hsize_t file_chunk_size = 0; + hsize_t iter_nelmts; /* Number of points to iterate over for the chunk IO operation */ + hbool_t should_fill = FALSE; + hbool_t fb_info_init = FALSE; + hbool_t index_empty = FALSE; + size_t i; + H5S_t * fill_space = NULL; + void * base_read_buf = NULL; + herr_t ret_value = SUCCEED; FUNC_ENTER_STATIC - HDassert(chunk_entry); + HDassert(chunk_list || 0 == chunk_list_num_entries); HDassert(io_info); HDassert(type_info); - HDassert(fm); - /* Retrieve filter settings from API context */ - if (H5CX_get_err_detect(&err_detect) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get error detection info") - if (H5CX_get_filter_cb(&filter_cb) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get I/O filter callback function") +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TRACE_ENTER(mpi_rank); + H5D_MPIO_TIME_START(mpi_rank, "Filtered collective chunk read"); +#else + (void)mpi_rank; +#endif - /* Look up the chunk and get its file and memory dataspaces */ - if (NULL == (chunk_info = (H5D_chunk_info_t *)H5SL_search(fm->sel_chunks, &chunk_entry->index))) - HGOTO_ERROR(H5E_DATASPACE, H5E_NOTFOUND, FAIL, "can't locate chunk in skip list") + /* Initialize temporary I/O info */ + coll_io_info = *io_info; + coll_io_info.u.rbuf = NULL; - if ((extent_npoints = H5S_GET_EXTENT_NPOINTS(chunk_info->fspace)) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid") - true_chunk_size = (hsize_t)extent_npoints * type_info->src_type_size; + if (chunk_list_num_entries) { + /* Retrieve filter settings from API context */ + if (H5CX_get_err_detect(&err_detect) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get error detection info") + if (H5CX_get_filter_cb(&filter_cb) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get I/O filter callback function") - /* If the size of the filtered chunk is larger than the number of points in the - * chunk file space extent times the datatype size, allocate enough space to hold the - * whole filtered chunk. Otherwise, allocate a buffer equal to the size of the - * chunk so that the unfiltering operation doesn't have to grow the buffer. + /* Set size of full chunks in dataset */ + file_chunk_size = io_info->dset->shared->layout.u.chunk.size; + + /* Determine if fill values should be "read" for unallocated chunks */ + should_fill = (io_info->dset->shared->dcpl_cache.fill.fill_time == H5D_FILL_TIME_ALLOC) || + ((io_info->dset->shared->dcpl_cache.fill.fill_time == H5D_FILL_TIME_IFSET) && + io_info->dset->shared->dcpl_cache.fill.fill_defined); + } + + /* + * Allocate memory buffers for all chunks being read. Chunk data buffers are of + * the largest size between the chunk's current filtered size and the chunk's true + * size, as calculated by the number of elements in the chunk's file space extent + * multiplied by the datatype size. This tries to ensure that: + * + * * If we're reading the chunk and the filter normally reduces the chunk size, + * the unfiltering operation won't need to grow the buffer. + * * If we're reading the chunk and the filter normally grows the chunk size, + * we make sure to read into a buffer of size equal to the filtered chunk's + * size; reading into a (smaller) buffer of size equal to the unfiltered + * chunk size would of course be bad. */ - buf_size = MAX(chunk_entry->chunk_states.chunk_current.length, true_chunk_size); + for (i = 0; i < chunk_list_num_entries; i++) { + HDassert(chunk_list[i].need_read); - if (NULL == (chunk_entry->buf = H5MM_malloc(buf_size))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk data buffer") + chunk_list[i].chunk_buf_size = MAX(chunk_list[i].chunk_current.length, file_chunk_size); + + if (NULL == (chunk_list[i].buf = H5MM_malloc(chunk_list[i].chunk_buf_size))) { + /* Push an error, but participate in collective read */ + HDONE_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk data buffer") + break; + } - /* If this is not a full chunk overwrite or this is a read operation, the chunk must be - * read from the file and unfiltered. + /* + * Check if chunk is currently allocated. If not, don't try to + * read it from the file. Instead, just fill the chunk buffer + * with the fill value if necessary. + */ + if (H5F_addr_defined(chunk_list[i].chunk_current.offset)) { + /* Set first read buffer */ + if (!base_read_buf) + base_read_buf = chunk_list[i].buf; + + /* Set chunk's new length for eventual filter pipeline calls */ + if (chunk_list[i].skip_filter_pline) + chunk_list[i].chunk_new.length = file_chunk_size; + else + chunk_list[i].chunk_new.length = chunk_list[i].chunk_current.length; + } + else { + chunk_list[i].need_read = FALSE; + + /* Set chunk's new length for eventual filter pipeline calls */ + chunk_list[i].chunk_new.length = file_chunk_size; + + if (should_fill) { + /* Initialize fill value buffer if not already initialized */ + if (!fb_info_init) { + hsize_t chunk_dims[H5S_MAX_RANK]; + + HDassert(io_info->dset->shared->ndims == io_info->dset->shared->layout.u.chunk.ndims - 1); + for (size_t j = 0; j < io_info->dset->shared->layout.u.chunk.ndims - 1; j++) + chunk_dims[j] = (hsize_t)io_info->dset->shared->layout.u.chunk.dim[j]; + + /* Get a dataspace for filling chunk memory buffers */ + if (NULL == (fill_space = H5S_create_simple( + io_info->dset->shared->layout.u.chunk.ndims - 1, chunk_dims, NULL))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to create chunk fill dataspace") + + /* Initialize fill value buffer */ + if (H5D__fill_init(&fb_info, NULL, (H5MM_allocate_t)H5D__chunk_mem_alloc, + (void *)&io_info->dset->shared->dcpl_cache.pline, + (H5MM_free_t)H5D__chunk_mem_free, + (void *)&io_info->dset->shared->dcpl_cache.pline, + &io_info->dset->shared->dcpl_cache.fill, io_info->dset->shared->type, + io_info->dset->shared->type_id, 0, file_chunk_size) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't initialize fill value buffer") + + fb_info_init = TRUE; + } + + /* Write fill value to memory buffer */ + HDassert(fb_info.fill_buf); + if (H5D__fill(fb_info.fill_buf, io_info->dset->shared->type, chunk_list[i].buf, + type_info->mem_type, fill_space) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "couldn't fill chunk buffer with fill value") + } + } + } + + /* + * If dataset is incrementally allocated and hasn't been written to + * yet, the chunk index should be empty. In this case, a collective + * read of chunks is essentially a no-op, so avoid it here. */ - if (!chunk_entry->full_overwrite || io_info->op_type == H5D_IO_OP_READ) { - H5FD_mpio_xfer_t xfer_mode; /* Parallel transfer for this request */ + index_empty = FALSE; + if (io_info->dset->shared->dcpl_cache.fill.alloc_time == H5D_ALLOC_TIME_INCR) + if (H5D__chunk_index_empty(io_info->dset, &index_empty) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "couldn't determine if chunk index is empty") + + if (!index_empty) { + /* + * Override the read buffer to point to the address of + * the first chunk data buffer being read into + */ + if (base_read_buf) + coll_io_info.u.rbuf = base_read_buf; - chunk_entry->chunk_states.new_chunk.length = chunk_entry->chunk_states.chunk_current.length; + /* Perform collective chunk read */ + if (H5D__mpio_collective_filtered_chunk_common_io(chunk_list, chunk_list_num_entries, &coll_io_info, + type_info, mpi_size) < 0) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "couldn't finish collective filtered chunk read") + } - /* Currently, these chunk reads are done independently and will likely - * cause issues with collective metadata reads enabled. In the future, - * this should be refactored to use collective chunk reads - JTH */ + /* + * Iterate through all the read chunks, unfiltering them and scattering their + * data out to the application's read buffer. + */ + for (i = 0; i < chunk_list_num_entries; i++) { + chunk_info = chunk_list[i].chunk_info; + + /* Unfilter the chunk, unless we didn't read it from the file */ + if (chunk_list[i].need_read && !chunk_list[i].skip_filter_pline) { + if (H5Z_pipeline(&io_info->dset->shared->dcpl_cache.pline, H5Z_FLAG_REVERSE, + &(chunk_list[i].index_info.filter_mask), err_detect, filter_cb, + (size_t *)&chunk_list[i].chunk_new.length, &chunk_list[i].chunk_buf_size, + &chunk_list[i].buf) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTFILTER, FAIL, "couldn't unfilter chunk for modifying") + } - /* Get the original state of parallel I/O transfer mode */ - if (H5CX_get_io_xfer_mode(&xfer_mode) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode") + /* Scatter the chunk data to the read buffer */ + iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->fspace); - /* Change the xfer_mode to independent for handling the I/O */ - if (H5CX_set_io_xfer_mode(H5FD_MPIO_INDEPENDENT) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTSET, FAIL, "can't set MPI-I/O transfer mode") + if (H5D_select_io_mem(io_info->u.rbuf, chunk_info->mspace, chunk_list[i].buf, chunk_info->fspace, + type_info->src_type_size, (size_t)iter_nelmts) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't copy chunk data to read buffer") + } - if (H5F_shared_block_read(io_info->f_sh, H5FD_MEM_DRAW, - chunk_entry->chunk_states.chunk_current.offset, - chunk_entry->chunk_states.new_chunk.length, chunk_entry->buf) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "unable to read raw data chunk") +done: + /* Free all resources used by entries in the chunk list */ + for (i = 0; i < chunk_list_num_entries; i++) { + if (chunk_list[i].buf) { + H5MM_free(chunk_list[i].buf); + chunk_list[i].buf = NULL; + } + } - /* Return to the original I/O transfer mode setting */ - if (H5CX_set_io_xfer_mode(xfer_mode) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTSET, FAIL, "can't set MPI-I/O transfer mode") + /* Release the fill buffer info, if it's been initialized */ + if (fb_info_init && H5D__fill_term(&fb_info) < 0) + HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "Can't release fill buffer info") + if (fill_space && (H5S_close(fill_space) < 0)) + HDONE_ERROR(H5E_DATASET, H5E_CLOSEERROR, FAIL, "can't close fill space") - if (H5Z_pipeline(&io_info->dset->shared->dcpl_cache.pline, H5Z_FLAG_REVERSE, &filter_mask, err_detect, - filter_cb, (size_t *)&chunk_entry->chunk_states.new_chunk.length, &buf_size, - &chunk_entry->buf) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTFILTER, FAIL, "couldn't unfilter chunk for modifying") - } /* end if */ - else { - chunk_entry->chunk_states.new_chunk.length = true_chunk_size; - } /* end else */ +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TIME_STOP(mpi_rank); + H5D_MPIO_TRACE_EXIT(mpi_rank); +#endif - /* Initialize iterator for memory selection */ - if (NULL == (mem_iter = (H5S_sel_iter_t *)H5MM_malloc(sizeof(H5S_sel_iter_t)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate memory iterator") + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__mpio_collective_filtered_chunk_read() */ + +/*------------------------------------------------------------------------- + * Function: H5D__mpio_collective_filtered_chunk_update + * + * Purpose: When performing a parallel write on a chunked dataset with + * filters applied, all ranks must update their owned chunks + * with their own modification data and data from other ranks. + * This routine is responsible for coordinating that process. + * + * Return: Non-negative on success/Negative on failure + * + *------------------------------------------------------------------------- + */ +static herr_t +H5D__mpio_collective_filtered_chunk_update(H5D_filtered_collective_io_info_t *chunk_list, + size_t chunk_list_num_entries, + H5D_filtered_collective_io_info_t *chunk_hash_table, + unsigned char **chunk_msg_bufs, int chunk_msg_bufs_len, + const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, + int mpi_rank, int mpi_size) +{ + H5D_fill_buf_info_t fb_info; + H5D_chunk_info_t * chunk_info = NULL; + H5S_sel_iter_t * sel_iter = NULL; /* Dataspace selection iterator for H5D__scatter_mem */ + H5D_io_info_t coll_io_info; + H5Z_EDC_t err_detect; /* Error detection info */ + H5Z_cb_t filter_cb; /* I/O filter callback function */ + hsize_t file_chunk_size = 0; + hsize_t iter_nelmts; /* Number of points to iterate over for the chunk IO operation */ + hbool_t should_fill = FALSE; + hbool_t fb_info_init = FALSE; + hbool_t sel_iter_init = FALSE; + hbool_t index_empty = FALSE; + size_t i; + H5S_t * dataspace = NULL; + H5S_t * fill_space = NULL; + void * base_read_buf = NULL; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC - if (H5S_select_iter_init(mem_iter, chunk_info->mspace, type_info->src_type_size, 0) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to initialize memory selection information") - mem_iter_init = TRUE; + HDassert(chunk_list || 0 == chunk_list_num_entries); + HDassert((chunk_msg_bufs && chunk_hash_table) || 0 == chunk_msg_bufs_len); + HDassert(io_info); + HDassert(type_info); + +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TRACE_ENTER(mpi_rank); + H5D_MPIO_TIME_START(mpi_rank, "Filtered collective chunk update"); +#endif - /* If this is a read operation, scatter the read chunk data to the user's buffer. + if (chunk_list_num_entries) { + /* Retrieve filter settings from API context */ + if (H5CX_get_err_detect(&err_detect) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get error detection info") + if (H5CX_get_filter_cb(&filter_cb) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get I/O filter callback function") + + /* Set size of full chunks in dataset */ + file_chunk_size = io_info->dset->shared->layout.u.chunk.size; + + /* Determine if fill values should be written to chunks */ + should_fill = (io_info->dset->shared->dcpl_cache.fill.fill_time == H5D_FILL_TIME_ALLOC) || + ((io_info->dset->shared->dcpl_cache.fill.fill_time == H5D_FILL_TIME_IFSET) && + io_info->dset->shared->dcpl_cache.fill.fill_defined); + } + + /* + * Allocate memory buffers for all owned chunks. Chunk data buffers are of the + * largest size between the chunk's current filtered size and the chunk's true + * size, as calculated by the number of elements in the chunk's file space extent + * multiplied by the datatype size. This tries to ensure that: * - * If this is a write operation, update the chunk data buffer with the modifications - * from the current process, then apply any modifications from other processes. Finally, - * filter the newly-updated chunk. + * * If we're fully overwriting the chunk and the filter normally reduces the + * chunk size, we simply have the exact buffer size required to hold the + * unfiltered chunk data. + * * If we're fully overwriting the chunk and the filter normally grows the + * chunk size (e.g., fletcher32 filter), the final filtering operation + * (hopefully) won't need to grow the buffer. + * * If we're reading the chunk and the filter normally reduces the chunk size, + * the unfiltering operation won't need to grow the buffer. + * * If we're reading the chunk and the filter normally grows the chunk size, + * we make sure to read into a buffer of size equal to the filtered chunk's + * size; reading into a (smaller) buffer of size equal to the unfiltered + * chunk size would of course be bad. */ - switch (io_info->op_type) { - case H5D_IO_OP_READ: - if (NULL == (file_iter = (H5S_sel_iter_t *)H5MM_malloc(sizeof(H5S_sel_iter_t)))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate file iterator") + for (i = 0; i < chunk_list_num_entries; i++) { + HDassert(mpi_rank == chunk_list[i].new_owner); - if (H5S_select_iter_init(file_iter, chunk_info->fspace, type_info->src_type_size, 0) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, - "unable to initialize memory selection information") - file_iter_init = TRUE; + chunk_list[i].chunk_buf_size = MAX(chunk_list[i].chunk_current.length, file_chunk_size); + + /* + * If this chunk hasn't been allocated yet and we aren't writing + * out fill values to it, make sure to 0-fill its memory buffer + * so we don't use uninitialized memory. + */ + if (!H5F_addr_defined(chunk_list[i].chunk_current.offset) && !should_fill) + chunk_list[i].buf = H5MM_calloc(chunk_list[i].chunk_buf_size); + else + chunk_list[i].buf = H5MM_malloc(chunk_list[i].chunk_buf_size); - iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->fspace); + if (NULL == chunk_list[i].buf) { + /* Push an error, but participate in collective read */ + HDONE_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk data buffer") + break; + } - if (NULL == (tmp_gath_buf = H5MM_malloc(iter_nelmts * type_info->src_type_size))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate temporary gather buffer") + /* Set chunk's new length for eventual filter pipeline calls */ + if (chunk_list[i].need_read) { + /* + * Check if chunk is currently allocated. If not, don't try to + * read it from the file. Instead, just fill the chunk buffer + * with the fill value if fill values are to be written. + */ + if (H5F_addr_defined(chunk_list[i].chunk_current.offset)) { + /* Set first read buffer */ + if (!base_read_buf) + base_read_buf = chunk_list[i].buf; + + /* Set chunk's new length for eventual filter pipeline calls */ + if (chunk_list[i].skip_filter_pline) + chunk_list[i].chunk_new.length = file_chunk_size; + else + chunk_list[i].chunk_new.length = chunk_list[i].chunk_current.length; + } + else { + chunk_list[i].need_read = FALSE; + + /* Set chunk's new length for eventual filter pipeline calls */ + chunk_list[i].chunk_new.length = file_chunk_size; + + if (should_fill) { + /* Initialize fill value buffer if not already initialized */ + if (!fb_info_init) { + hsize_t chunk_dims[H5S_MAX_RANK]; + + HDassert(io_info->dset->shared->ndims == + io_info->dset->shared->layout.u.chunk.ndims - 1); + for (size_t j = 0; j < io_info->dset->shared->layout.u.chunk.ndims - 1; j++) + chunk_dims[j] = (hsize_t)io_info->dset->shared->layout.u.chunk.dim[j]; + + /* Get a dataspace for filling chunk memory buffers */ + if (NULL == (fill_space = H5S_create_simple( + io_info->dset->shared->layout.u.chunk.ndims - 1, chunk_dims, NULL))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, + "unable to create chunk fill dataspace") + + /* Initialize fill value buffer */ + if (H5D__fill_init(&fb_info, NULL, (H5MM_allocate_t)H5D__chunk_mem_alloc, + (void *)&io_info->dset->shared->dcpl_cache.pline, + (H5MM_free_t)H5D__chunk_mem_free, + (void *)&io_info->dset->shared->dcpl_cache.pline, + &io_info->dset->shared->dcpl_cache.fill, + io_info->dset->shared->type, io_info->dset->shared->type_id, 0, + file_chunk_size) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't initialize fill value buffer") + + fb_info_init = TRUE; + } + + /* Write fill value to memory buffer */ + HDassert(fb_info.fill_buf); + if (H5D__fill(fb_info.fill_buf, io_info->dset->shared->type, chunk_list[i].buf, + type_info->mem_type, fill_space) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, + "couldn't fill chunk buffer with fill value") + } + } + } + else + chunk_list[i].chunk_new.length = file_chunk_size; + } - if (!H5D__gather_mem(chunk_entry->buf, file_iter, (size_t)iter_nelmts, tmp_gath_buf)) - HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "couldn't gather from chunk buffer") + /* + * If dataset is incrementally allocated and hasn't been written to + * yet, the chunk index should be empty. In this case, a collective + * read of chunks is essentially a no-op, so avoid it here. + */ + index_empty = FALSE; + if (io_info->dset->shared->dcpl_cache.fill.alloc_time == H5D_ALLOC_TIME_INCR) + if (H5D__chunk_index_empty(io_info->dset, &index_empty) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "couldn't determine if chunk index is empty") - iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace); + if (!index_empty) { + /* + * Setup for I/O operation + */ - if (H5D__scatter_mem(tmp_gath_buf, mem_iter, (size_t)iter_nelmts, io_info->u.rbuf) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't scatter to read buffer") + /* Initialize temporary I/O info */ + coll_io_info = *io_info; + coll_io_info.op_type = H5D_IO_OP_READ; - break; + /* Override the read buffer to point to the address of the first + * chunk data buffer being read into + */ + if (base_read_buf) + coll_io_info.u.rbuf = base_read_buf; - case H5D_IO_OP_WRITE: - iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace); + /* Read all chunks that need to be read from the file */ + if (H5D__mpio_collective_filtered_chunk_common_io(chunk_list, chunk_list_num_entries, &coll_io_info, + type_info, mpi_size) < 0) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "couldn't finish collective filtered chunk read") + } - if (NULL == (tmp_gath_buf = H5MM_malloc(iter_nelmts * type_info->src_type_size))) - HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate temporary gather buffer") + /* + * Now that all owned chunks have been read, update the chunks + * with modification data from the owning rank and other ranks. + */ - /* Gather modification data from the application write buffer into a temporary buffer */ - if (0 == H5D__gather_mem(io_info->u.wbuf, mem_iter, (size_t)iter_nelmts, tmp_gath_buf)) - HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "couldn't gather from write buffer") + /* Process all chunks with data from the owning rank first */ + for (i = 0; i < chunk_list_num_entries; i++) { + HDassert(mpi_rank == chunk_list[i].new_owner); - if (H5S_SELECT_ITER_RELEASE(mem_iter) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator") - mem_iter_init = FALSE; + chunk_info = chunk_list[i].chunk_info; - /* Initialize iterator for file selection */ - if (H5S_select_iter_init(mem_iter, chunk_info->fspace, type_info->dst_type_size, 0) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, - "unable to initialize file selection information") - mem_iter_init = TRUE; + /* + * If this chunk wasn't being fully overwritten, we read it from + * the file, so we need to unfilter it + */ + if (chunk_list[i].need_read && !chunk_list[i].skip_filter_pline) { + if (H5Z_pipeline(&io_info->dset->shared->dcpl_cache.pline, H5Z_FLAG_REVERSE, + &(chunk_list[i].index_info.filter_mask), err_detect, filter_cb, + (size_t *)&chunk_list[i].chunk_new.length, &chunk_list[i].chunk_buf_size, + &chunk_list[i].buf) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTFILTER, FAIL, "couldn't unfilter chunk for modifying") + } - iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->fspace); + iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace); - /* Scatter the owner's modification data into the chunk data buffer according to - * the file space. - */ - if (H5D__scatter_mem(tmp_gath_buf, mem_iter, (size_t)iter_nelmts, chunk_entry->buf) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't scatter to chunk data buffer") + if (H5D_select_io_mem(chunk_list[i].buf, chunk_info->fspace, io_info->u.wbuf, chunk_info->mspace, + type_info->dst_type_size, (size_t)iter_nelmts) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't copy chunk data to write buffer") + } - if (H5S_SELECT_ITER_RELEASE(mem_iter) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator") - mem_iter_init = FALSE; + /* Allocate iterator for memory selection */ + if (NULL == (sel_iter = H5FL_MALLOC(H5S_sel_iter_t))) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate memory iterator") - if (MPI_SUCCESS != - (mpi_code = MPI_Waitall(chunk_entry->async_info.num_receive_requests, - chunk_entry->async_info.receive_requests_array, MPI_STATUSES_IGNORE))) - HMPI_GOTO_ERROR(FAIL, "MPI_Waitall failed", mpi_code) + /* Now process all received chunk message buffers */ + for (i = 0; i < (size_t)chunk_msg_bufs_len; i++) { + H5D_filtered_collective_io_info_t *chunk_entry = NULL; + const unsigned char * msg_ptr = chunk_msg_bufs[i]; + hsize_t chunk_idx; - /* For each asynchronous receive call previously posted, receive the chunk modification - * buffer from another rank and update the chunk data - */ - for (i = 0; i < (size_t)chunk_entry->async_info.num_receive_requests; i++) { - const unsigned char *mod_data_p; + if (msg_ptr) { + /* Retrieve the chunk's index value */ + HDmemcpy(&chunk_idx, msg_ptr, sizeof(hsize_t)); + msg_ptr += sizeof(hsize_t); - /* Decode the process' chunk file dataspace */ - mod_data_p = chunk_entry->async_info.receive_buffer_array[i]; - if (NULL == (dataspace = H5S_decode(&mod_data_p))) + /* Find the chunk entry according to its chunk index */ + HASH_FIND(hh, chunk_hash_table, &chunk_idx, sizeof(hsize_t), chunk_entry); + HDassert(chunk_entry); + HDassert(mpi_rank == chunk_entry->new_owner); + + /* + * Only process the chunk if its data buffer is allocated. + * In the case of multi-chunk I/O, we're only working on + * a chunk at a time, so we need to skip over messages + * that aren't for the chunk we're currently working on. + */ + if (!chunk_entry->buf) + continue; + else { + /* Decode the chunk file dataspace from the message */ + if (NULL == (dataspace = H5S_decode(&msg_ptr))) HGOTO_ERROR(H5E_DATASET, H5E_CANTDECODE, FAIL, "unable to decode dataspace") - if (H5S_select_iter_init(mem_iter, dataspace, type_info->dst_type_size, 0) < 0) + if (H5S_select_iter_init(sel_iter, dataspace, type_info->dst_type_size, + H5S_SEL_ITER_SHARE_WITH_DATASPACE) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to initialize memory selection information") - mem_iter_init = TRUE; + sel_iter_init = TRUE; iter_nelmts = H5S_GET_SELECT_NPOINTS(dataspace); /* Update the chunk data with the received modification data */ - if (H5D__scatter_mem(mod_data_p, mem_iter, (size_t)iter_nelmts, chunk_entry->buf) < 0) + if (H5D__scatter_mem(msg_ptr, sel_iter, (size_t)iter_nelmts, chunk_entry->buf) < 0) HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't scatter to write buffer") - if (H5S_SELECT_ITER_RELEASE(mem_iter) < 0) + if (H5S_SELECT_ITER_RELEASE(sel_iter) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator") - mem_iter_init = FALSE; + sel_iter_init = FALSE; + if (dataspace) { if (H5S_close(dataspace) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "can't close dataspace") dataspace = NULL; } - H5MM_free(chunk_entry->async_info.receive_buffer_array[i]); - } /* end for */ - /* Filter the chunk */ - if (H5Z_pipeline(&io_info->dset->shared->dcpl_cache.pline, 0, &filter_mask, err_detect, filter_cb, - (size_t *)&chunk_entry->chunk_states.new_chunk.length, &buf_size, - &chunk_entry->buf) < 0) + H5MM_free(chunk_msg_bufs[i]); + chunk_msg_bufs[i] = NULL; + } + } + } + + /* Finally, filter all the chunks */ + for (i = 0; i < chunk_list_num_entries; i++) { + if (!chunk_list[i].skip_filter_pline) { + if (H5Z_pipeline(&io_info->dset->shared->dcpl_cache.pline, 0, + &(chunk_list[i].index_info.filter_mask), err_detect, filter_cb, + (size_t *)&chunk_list[i].chunk_new.length, &chunk_list[i].chunk_buf_size, + &chunk_list[i].buf) < 0) HGOTO_ERROR(H5E_PLINE, H5E_CANTFILTER, FAIL, "output pipeline failed") + } #if H5_SIZEOF_SIZE_T > 4 - /* Check for the chunk expanding too much to encode in a 32-bit value */ - if (chunk_entry->chunk_states.new_chunk.length > ((size_t)0xffffffff)) - HGOTO_ERROR(H5E_DATASET, H5E_BADRANGE, FAIL, "chunk too large for 32-bit length") + /* Check for the chunk expanding too much to encode in a 32-bit value */ + if (chunk_list[i].chunk_new.length > ((size_t)0xffffffff)) + HGOTO_ERROR(H5E_DATASET, H5E_BADRANGE, FAIL, "chunk too large for 32-bit length") #endif - break; + } - default: - HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "invalid I/O operation") - } /* end switch */ +done: + if (sel_iter) { + if (sel_iter_init && H5S_SELECT_ITER_RELEASE(sel_iter) < 0) + HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator") + sel_iter = H5FL_FREE(H5S_sel_iter_t, sel_iter); + } + if (dataspace && (H5S_close(dataspace) < 0)) + HDONE_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "can't close dataspace") + if (fill_space && (H5S_close(fill_space) < 0)) + HDONE_ERROR(H5E_DATASET, H5E_CLOSEERROR, FAIL, "can't close fill space") + + /* Release the fill buffer info, if it's been initialized */ + if (fb_info_init && H5D__fill_term(&fb_info) < 0) + HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "Can't release fill buffer info") + + /* On failure, try to free all resources used by entries in the chunk list */ + if (ret_value < 0) { + for (i = 0; i < chunk_list_num_entries; i++) { + if (chunk_list[i].buf) { + H5MM_free(chunk_list[i].buf); + chunk_list[i].buf = NULL; + } + } + } + +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TIME_STOP(mpi_rank); + H5D_MPIO_TRACE_EXIT(mpi_rank); +#endif + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__mpio_collective_filtered_chunk_update() */ + +/*------------------------------------------------------------------------- + * Function: H5D__mpio_collective_filtered_chunk_reallocate + * + * Purpose: When performing a parallel write on a chunked dataset with + * filters applied, all ranks must eventually get together and + * perform a collective reallocation of space in the file for + * all chunks that were modified on all ranks. This routine is + * responsible for coordinating that process. + * + * Return: Non-negative on success/Negative on failure + * + *------------------------------------------------------------------------- + */ +static herr_t +H5D__mpio_collective_filtered_chunk_reallocate(H5D_filtered_collective_io_info_t *chunk_list, + size_t chunk_list_num_entries, size_t *num_chunks_assigned_map, + H5D_io_info_t *io_info, H5D_chk_idx_info_t *idx_info, + int mpi_rank, int mpi_size) +{ + H5D_chunk_alloc_info_t *collective_list = NULL; + MPI_Datatype send_type; + MPI_Datatype recv_type; + hbool_t send_type_derived = FALSE; + hbool_t recv_type_derived = FALSE; + hbool_t need_sort = FALSE; + size_t collective_num_entries = 0; + size_t num_local_chunks_processed = 0; + size_t i; + void * gathered_array = NULL; + int * counts_disps_array = NULL; + int * counts_ptr = NULL; + int * displacements_ptr = NULL; + int mpi_code; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC + + HDassert(chunk_list || 0 == chunk_list_num_entries); + HDassert(io_info); + HDassert(idx_info); + HDassert(idx_info->storage->idx_type != H5D_CHUNK_IDX_NONE); + +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TRACE_ENTER(mpi_rank); + H5D_MPIO_TIME_START(mpi_rank, "Reallocation of chunk file space"); +#endif + + /* + * Make sure it's safe to cast this rank's number + * of chunks to be sent into an int for MPI + */ + H5_CHECK_OVERFLOW(chunk_list_num_entries, size_t, int); + + /* Create derived datatypes for the chunk file space info needed */ + if (H5D__mpio_get_chunk_alloc_info_types(&recv_type, &recv_type_derived, &send_type, &send_type_derived) < + 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, + "can't create derived datatypes for chunk file space info") + + /* + * Gather the new chunk sizes to all ranks for a collective reallocation + * of the chunks in the file. + */ + if (num_chunks_assigned_map) { + /* + * If a mapping between rank value -> number of assigned chunks has + * been provided (usually during linked-chunk I/O), we can use this + * to optimize MPI overhead a bit since MPI ranks won't need to + * first inform each other about how many chunks they're contributing. + */ + if (NULL == (counts_disps_array = H5MM_malloc(2 * (size_t)mpi_size * sizeof(*counts_disps_array)))) { + /* Push an error, but still participate in collective gather operation */ + HDONE_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate receive counts and displacements array") + } + else { + /* Set the receive counts from the assigned chunks map */ + counts_ptr = counts_disps_array; + + for (i = 0; i < (size_t)mpi_size; i++) + H5_CHECKED_ASSIGN(counts_ptr[i], int, num_chunks_assigned_map[i], size_t); + + /* Set the displacements into the receive buffer for the gather operation */ + displacements_ptr = &counts_disps_array[mpi_size]; + + *displacements_ptr = 0; + for (i = 1; i < (size_t)mpi_size; i++) + displacements_ptr[i] = displacements_ptr[i - 1] + counts_ptr[i - 1]; + } + + /* Perform gather operation */ + if (H5_mpio_gatherv_alloc(chunk_list, (int)chunk_list_num_entries, send_type, counts_ptr, + displacements_ptr, recv_type, TRUE, 0, io_info->comm, mpi_rank, mpi_size, + &gathered_array, &collective_num_entries) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, "can't gather chunk file space info to/from ranks") + } + else { + /* + * If no mapping between rank value -> number of assigned chunks has + * been provided (usually during multi-chunk I/O), all MPI ranks will + * need to first inform other ranks about how many chunks they're + * contributing before performing the actual gather operation. Use + * the 'simple' MPI_Allgatherv wrapper for this. + */ + if (H5_mpio_gatherv_alloc_simple(chunk_list, (int)chunk_list_num_entries, send_type, recv_type, TRUE, + 0, io_info->comm, mpi_rank, mpi_size, &gathered_array, + &collective_num_entries) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, "can't gather chunk file space info to/from ranks") + } + + /* Collectively re-allocate the modified chunks (from each rank) in the file */ + collective_list = (H5D_chunk_alloc_info_t *)gathered_array; + for (i = 0, num_local_chunks_processed = 0; i < collective_num_entries; i++) { + H5D_chunk_alloc_info_t *coll_entry = &collective_list[i]; + hbool_t need_insert; + hbool_t update_local_chunk; + + if (H5D__chunk_file_alloc(idx_info, &coll_entry->chunk_current, &coll_entry->chunk_new, &need_insert, + NULL) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate chunk") + + /* + * If we just re-allocated a chunk that is local to this + * rank, make sure to update the chunk entry in the local + * chunk list + */ + update_local_chunk = + (num_local_chunks_processed < chunk_list_num_entries) && + (coll_entry->chunk_idx == chunk_list[num_local_chunks_processed].index_info.chunk_idx); + + if (update_local_chunk) { + H5D_filtered_collective_io_info_t *local_chunk; + + local_chunk = &chunk_list[num_local_chunks_processed]; + + /* Sanity check that this chunk is actually local */ + HDassert(mpi_rank == local_chunk->orig_owner); + HDassert(mpi_rank == local_chunk->new_owner); + + local_chunk->chunk_new = coll_entry->chunk_new; + local_chunk->index_info.need_insert = need_insert; + + /* + * Since chunk reallocation can move chunks around, check if + * the local chunk list is still in ascending offset of order + * in the file + */ + if (num_local_chunks_processed) { + haddr_t curr_chunk_offset = local_chunk->chunk_new.offset; + haddr_t prev_chunk_offset = chunk_list[num_local_chunks_processed - 1].chunk_new.offset; + + HDassert(H5F_addr_defined(prev_chunk_offset) && H5F_addr_defined(curr_chunk_offset)); + if (curr_chunk_offset < prev_chunk_offset) + need_sort = TRUE; + } + + num_local_chunks_processed++; + } + } + + HDassert(chunk_list_num_entries == num_local_chunks_processed); + + /* + * Ensure this rank's local chunk list is sorted in + * ascending order of offset in the file + */ + if (need_sort) + HDqsort(chunk_list, chunk_list_num_entries, sizeof(H5D_filtered_collective_io_info_t), + H5D__cmp_filtered_collective_io_info_entry); + +done: + H5MM_free(gathered_array); + H5MM_free(counts_disps_array); + + if (send_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&send_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + } + if (recv_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&recv_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + } + +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TIME_STOP(mpi_rank); + H5D_MPIO_TRACE_EXIT(mpi_rank); +#endif + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5D__mpio_collective_filtered_chunk_reallocate() */ + +/*------------------------------------------------------------------------- + * Function: H5D__mpio_collective_filtered_chunk_reinsert + * + * Purpose: When performing a parallel write on a chunked dataset with + * filters applied, all ranks must eventually get together and + * perform a collective reinsertion into the dataset's chunk + * index of chunks that were modified. This routine is + * responsible for coordinating that process. + * + * Return: Non-negative on success/Negative on failure + * + *------------------------------------------------------------------------- + */ +static herr_t +H5D__mpio_collective_filtered_chunk_reinsert(H5D_filtered_collective_io_info_t *chunk_list, + size_t chunk_list_num_entries, size_t *num_chunks_assigned_map, + H5D_io_info_t *io_info, H5D_chk_idx_info_t *idx_info, + int mpi_rank, int mpi_size) +{ + H5D_chunk_ud_t chunk_ud; + MPI_Datatype send_type; + MPI_Datatype recv_type; + hbool_t send_type_derived = FALSE; + hbool_t recv_type_derived = FALSE; + hsize_t scaled_coords[H5O_LAYOUT_NDIMS]; + size_t collective_num_entries = 0; + size_t i; + void * gathered_array = NULL; + int * counts_disps_array = NULL; + int * counts_ptr = NULL; + int * displacements_ptr = NULL; + int mpi_code; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC + + HDassert(chunk_list || 0 == chunk_list_num_entries); + HDassert(io_info); + HDassert(idx_info); + +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TRACE_ENTER(mpi_rank); + H5D_MPIO_TIME_START(mpi_rank, "Reinsertion of modified chunks into chunk index"); +#endif + + /* Only re-insert chunks if index has an insert method */ + if (!idx_info->storage->ops->insert) + HGOTO_DONE(SUCCEED); + + /* + * Make sure it's safe to cast this rank's number + * of chunks to be sent into an int for MPI + */ + H5_CHECK_OVERFLOW(chunk_list_num_entries, size_t, int); + + /* Create derived datatypes for the chunk re-insertion info needed */ + if (H5D__mpio_get_chunk_insert_info_types(&recv_type, &recv_type_derived, &send_type, + &send_type_derived) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, + "can't create derived datatypes for chunk re-insertion info") + + /* + * Gather information to all ranks for a collective re-insertion + * of the modified chunks into the chunk index + */ + if (num_chunks_assigned_map) { + /* + * If a mapping between rank value -> number of assigned chunks has + * been provided (usually during linked-chunk I/O), we can use this + * to optimize MPI overhead a bit since MPI ranks won't need to + * first inform each other about how many chunks they're contributing. + */ + if (NULL == (counts_disps_array = H5MM_malloc(2 * (size_t)mpi_size * sizeof(*counts_disps_array)))) { + /* Push an error, but still participate in collective gather operation */ + HDONE_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate receive counts and displacements array") + } + else { + /* Set the receive counts from the assigned chunks map */ + counts_ptr = counts_disps_array; + + for (i = 0; i < (size_t)mpi_size; i++) + H5_CHECKED_ASSIGN(counts_ptr[i], int, num_chunks_assigned_map[i], size_t); + + /* Set the displacements into the receive buffer for the gather operation */ + displacements_ptr = &counts_disps_array[mpi_size]; + + *displacements_ptr = 0; + for (i = 1; i < (size_t)mpi_size; i++) + displacements_ptr[i] = displacements_ptr[i - 1] + counts_ptr[i - 1]; + } + + /* Perform gather operation */ + if (H5_mpio_gatherv_alloc(chunk_list, (int)chunk_list_num_entries, send_type, counts_ptr, + displacements_ptr, recv_type, TRUE, 0, io_info->comm, mpi_rank, mpi_size, + &gathered_array, &collective_num_entries) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, + "can't gather chunk index re-insertion info to/from ranks") + } + else { + /* + * If no mapping between rank value -> number of assigned chunks has + * been provided (usually during multi-chunk I/O), all MPI ranks will + * need to first inform other ranks about how many chunks they're + * contributing before performing the actual gather operation. Use + * the 'simple' MPI_Allgatherv wrapper for this. + */ + if (H5_mpio_gatherv_alloc_simple(chunk_list, (int)chunk_list_num_entries, send_type, recv_type, TRUE, + 0, io_info->comm, mpi_rank, mpi_size, &gathered_array, + &collective_num_entries) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, + "can't gather chunk index re-insertion info to/from ranks") + } + + /* Initialize static chunk udata fields from chunk index info */ + H5D_MPIO_INIT_CHUNK_UD_INFO(chunk_ud, idx_info); + + for (i = 0; i < collective_num_entries; i++) { + H5D_chunk_insert_info_t *coll_entry = &((H5D_chunk_insert_info_t *)gathered_array)[i]; + + /* + * We only need to reinsert this chunk if we had to actually + * allocate or reallocate space in the file for it + */ + if (!coll_entry->index_info.need_insert) + continue; + + chunk_ud.chunk_block = coll_entry->chunk_block; + chunk_ud.chunk_idx = coll_entry->index_info.chunk_idx; + chunk_ud.filter_mask = coll_entry->index_info.filter_mask; + chunk_ud.common.scaled = scaled_coords; + + /* Calculate scaled coordinates for the chunk */ + if (idx_info->layout->idx_type == H5D_CHUNK_IDX_EARRAY && idx_info->layout->u.earray.unlim_dim > 0) { + /* + * Extensible arrays where the unlimited dimension is not + * the slowest-changing dimension "swizzle" the coordinates + * to move the unlimited dimension value to offset 0. Therefore, + * we use the "swizzled" down chunks to calculate the "swizzled" + * scaled coordinates and then we undo the "swizzle" operation. + * + * TODO: In the future, this is something that should be handled + * by the particular chunk index rather than manually + * here. Likely, the chunk index ops should get a new + * callback that accepts a chunk index and provides the + * caller with the scaled coordinates for that chunk. + */ + H5VM_array_calc_pre(chunk_ud.chunk_idx, io_info->dset->shared->ndims, + idx_info->layout->u.earray.swizzled_down_chunks, scaled_coords); + + H5VM_unswizzle_coords(hsize_t, scaled_coords, idx_info->layout->u.earray.unlim_dim); + } + else { + H5VM_array_calc_pre(chunk_ud.chunk_idx, io_info->dset->shared->ndims, + io_info->dset->shared->layout.u.chunk.down_chunks, scaled_coords); + } + + scaled_coords[io_info->dset->shared->ndims] = 0; + +#ifndef NDEBUG + /* + * If a matching local chunk entry is found, the + * `chunk_info` structure (which contains the chunk's + * pre-computed scaled coordinates) will be valid + * for this rank. Compare those coordinates against + * the calculated coordinates above to make sure + * they match. + */ + for (size_t dbg_idx = 0; dbg_idx < chunk_list_num_entries; dbg_idx++) { + if (coll_entry->index_info.chunk_idx == chunk_list[dbg_idx].index_info.chunk_idx) { + hbool_t coords_match = !HDmemcmp(scaled_coords, chunk_list[dbg_idx].chunk_info->scaled, + io_info->dset->shared->ndims * sizeof(hsize_t)); + + HDassert(coords_match && "Calculated scaled coordinates for chunk didn't match " + "chunk's actual scaled coordinates!"); + break; + } + } +#endif + + if ((idx_info->storage->ops->insert)(idx_info, &chunk_ud, io_info->dset) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINSERT, FAIL, "unable to insert chunk address into index") + } + +done: + H5MM_free(gathered_array); + H5MM_free(counts_disps_array); + + if (send_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&send_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + } + if (recv_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&recv_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + } + +#ifdef H5Dmpio_DEBUG + H5D_MPIO_TIME_STOP(mpi_rank); + H5D_MPIO_TRACE_EXIT(mpi_rank); +#endif + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__mpio_collective_filtered_chunk_reinsert() */ + +/*------------------------------------------------------------------------- + * Function: H5D__mpio_get_chunk_redistribute_info_types + * + * Purpose: Constructs MPI derived datatypes for communicating the + * info from a H5D_filtered_collective_io_info_t structure + * that is necessary for redistributing shared chunks during a + * collective write of filtered chunks. + * + * The datatype returned through `contig_type` has an extent + * equal to the size of an H5D_chunk_redistribute_info_t + * structure and is suitable for communicating that structure + * type. + * + * The datatype returned through `resized_type` has an extent + * equal to the size of an H5D_filtered_collective_io_info_t + * structure. This makes it suitable for sending an array of + * those structures, while extracting out just the info + * necessary for the chunk redistribution operation during + * communication. + * + * Return: Non-negative on success/Negative on failure + * + *------------------------------------------------------------------------- + */ +static herr_t +H5D__mpio_get_chunk_redistribute_info_types(MPI_Datatype *contig_type, hbool_t *contig_type_derived, + MPI_Datatype *resized_type, hbool_t *resized_type_derived) +{ + MPI_Datatype struct_type = MPI_DATATYPE_NULL; + hbool_t struct_type_derived = FALSE; + MPI_Datatype chunk_block_type = MPI_DATATYPE_NULL; + hbool_t chunk_block_type_derived = FALSE; + MPI_Datatype types[5]; + MPI_Aint displacements[5]; + int block_lengths[5]; + int field_count; + int mpi_code; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC + + HDassert(contig_type); + HDassert(contig_type_derived); + HDassert(resized_type); + HDassert(resized_type_derived); + + *contig_type_derived = FALSE; + *resized_type_derived = FALSE; + + /* Create struct type for the inner H5F_block_t structure */ + if (H5F_mpi_get_file_block_type(FALSE, &chunk_block_type, &chunk_block_type_derived) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't create derived type for chunk file description") + + field_count = 5; + HDassert(field_count == (sizeof(types) / sizeof(MPI_Datatype))); + + /* + * Create structure type to pack chunk H5F_block_t structure + * next to chunk_idx, orig_owner, new_owner and num_writers + * fields + */ + block_lengths[0] = 1; + block_lengths[1] = 1; + block_lengths[2] = 1; + block_lengths[3] = 1; + block_lengths[4] = 1; + displacements[0] = offsetof(H5D_chunk_redistribute_info_t, chunk_block); + displacements[1] = offsetof(H5D_chunk_redistribute_info_t, chunk_idx); + displacements[2] = offsetof(H5D_chunk_redistribute_info_t, orig_owner); + displacements[3] = offsetof(H5D_chunk_redistribute_info_t, new_owner); + displacements[4] = offsetof(H5D_chunk_redistribute_info_t, num_writers); + types[0] = chunk_block_type; + types[1] = HSIZE_AS_MPI_TYPE; + types[2] = MPI_INT; + types[3] = MPI_INT; + types[4] = MPI_INT; + if (MPI_SUCCESS != + (mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, contig_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) + *contig_type_derived = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(contig_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + + /* Create struct type to extract the chunk_current, chunk_idx, orig_owner, + * new_owner and num_writers fields from a H5D_filtered_collective_io_info_t + * structure + */ + block_lengths[0] = 1; + block_lengths[1] = 1; + block_lengths[2] = 1; + block_lengths[3] = 1; + block_lengths[4] = 1; + displacements[0] = offsetof(H5D_filtered_collective_io_info_t, chunk_current); + displacements[1] = offsetof(H5D_filtered_collective_io_info_t, index_info.chunk_idx); + displacements[2] = offsetof(H5D_filtered_collective_io_info_t, orig_owner); + displacements[3] = offsetof(H5D_filtered_collective_io_info_t, new_owner); + displacements[4] = offsetof(H5D_filtered_collective_io_info_t, num_writers); + types[0] = chunk_block_type; + types[1] = HSIZE_AS_MPI_TYPE; + types[2] = MPI_INT; + types[3] = MPI_INT; + types[4] = MPI_INT; + if (MPI_SUCCESS != + (mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, &struct_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) + struct_type_derived = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_create_resized( + struct_type, 0, sizeof(H5D_filtered_collective_io_info_t), resized_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_resized failed", mpi_code) + *resized_type_derived = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(resized_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + +done: + if (struct_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&struct_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + } + if (chunk_block_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&chunk_block_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + } + + if (ret_value < 0) { + if (*resized_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(resized_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + *resized_type_derived = FALSE; + } + if (*contig_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(contig_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + *contig_type_derived = FALSE; + } + } + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__mpio_get_chunk_redistribute_info_types() */ + +/*------------------------------------------------------------------------- + * Function: H5D__mpio_get_chunk_alloc_info_types + * + * Purpose: Constructs MPI derived datatypes for communicating the info + * from a H5D_filtered_collective_io_info_t structure that is + * necessary for re-allocating file space during a collective + * write of filtered chunks. + * + * The datatype returned through `contig_type` has an extent + * equal to the size of an H5D_chunk_alloc_info_t structure + * and is suitable for communicating that structure type. + * + * The datatype returned through `resized_type` has an extent + * equal to the size of an H5D_filtered_collective_io_info_t + * structure. This makes it suitable for sending an array of + * those structures, while extracting out just the info + * necessary for the chunk file space reallocation operation + * during communication. + * + * Return: Non-negative on success/Negative on failure + * + *------------------------------------------------------------------------- + */ +static herr_t +H5D__mpio_get_chunk_alloc_info_types(MPI_Datatype *contig_type, hbool_t *contig_type_derived, + MPI_Datatype *resized_type, hbool_t *resized_type_derived) +{ + MPI_Datatype struct_type = MPI_DATATYPE_NULL; + hbool_t struct_type_derived = FALSE; + MPI_Datatype chunk_block_type = MPI_DATATYPE_NULL; + hbool_t chunk_block_type_derived = FALSE; + MPI_Datatype types[3]; + MPI_Aint displacements[3]; + int block_lengths[3]; + int field_count; + int mpi_code; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC + + HDassert(contig_type); + HDassert(contig_type_derived); + HDassert(resized_type); + HDassert(resized_type_derived); + + *contig_type_derived = FALSE; + *resized_type_derived = FALSE; + + /* Create struct type for the inner H5F_block_t structure */ + if (H5F_mpi_get_file_block_type(FALSE, &chunk_block_type, &chunk_block_type_derived) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't create derived type for chunk file description") + + field_count = 3; + HDassert(field_count == (sizeof(types) / sizeof(MPI_Datatype))); + + /* + * Create structure type to pack both chunk H5F_block_t structures + * next to chunk_idx field + */ + block_lengths[0] = 1; + block_lengths[1] = 1; + block_lengths[2] = 1; + displacements[0] = offsetof(H5D_chunk_alloc_info_t, chunk_current); + displacements[1] = offsetof(H5D_chunk_alloc_info_t, chunk_new); + displacements[2] = offsetof(H5D_chunk_alloc_info_t, chunk_idx); + types[0] = chunk_block_type; + types[1] = chunk_block_type; + types[2] = HSIZE_AS_MPI_TYPE; + if (MPI_SUCCESS != + (mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, contig_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) + *contig_type_derived = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(contig_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + + /* + * Create struct type to extract the chunk_current, chunk_new and chunk_idx + * fields from a H5D_filtered_collective_io_info_t structure + */ + block_lengths[0] = 1; + block_lengths[1] = 1; + block_lengths[2] = 1; + displacements[0] = offsetof(H5D_filtered_collective_io_info_t, chunk_current); + displacements[1] = offsetof(H5D_filtered_collective_io_info_t, chunk_new); + displacements[2] = offsetof(H5D_filtered_collective_io_info_t, index_info.chunk_idx); + types[0] = chunk_block_type; + types[1] = chunk_block_type; + types[2] = HSIZE_AS_MPI_TYPE; + if (MPI_SUCCESS != + (mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, &struct_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) + struct_type_derived = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_create_resized( + struct_type, 0, sizeof(H5D_filtered_collective_io_info_t), resized_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_resized failed", mpi_code) + *resized_type_derived = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(resized_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + +done: + if (struct_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&struct_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + } + if (chunk_block_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&chunk_block_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + } + + if (ret_value < 0) { + if (*resized_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(resized_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + *resized_type_derived = FALSE; + } + if (*contig_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(contig_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + *contig_type_derived = FALSE; + } + } + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__mpio_get_chunk_alloc_info_types() */ + +/*------------------------------------------------------------------------- + * Function: H5D__mpio_get_chunk_insert_info_types + * + * Purpose: Constructs MPI derived datatypes for communicating the + * information necessary when reinserting chunks into a + * dataset's chunk index. This includes the chunk's new offset + * and size (H5F_block_t) and the inner `index_info` structure + * of a H5D_filtered_collective_io_info_t structure. + * + * The datatype returned through `contig_type` has an extent + * equal to the size of an H5D_chunk_insert_info_t structure + * and is suitable for communicating that structure type. + * + * The datatype returned through `resized_type` has an extent + * equal to the size of the encompassing + * H5D_filtered_collective_io_info_t structure. This makes it + * suitable for sending an array of + * H5D_filtered_collective_io_info_t structures, while + * extracting out just the information needed during + * communication. + * + * Return: Non-negative on success/Negative on failure + * + *------------------------------------------------------------------------- + */ +static herr_t +H5D__mpio_get_chunk_insert_info_types(MPI_Datatype *contig_type, hbool_t *contig_type_derived, + MPI_Datatype *resized_type, hbool_t *resized_type_derived) +{ + MPI_Datatype struct_type = MPI_DATATYPE_NULL; + hbool_t struct_type_derived = FALSE; + MPI_Datatype chunk_block_type = MPI_DATATYPE_NULL; + hbool_t chunk_block_type_derived = FALSE; + MPI_Aint contig_type_extent; + MPI_Datatype types[4]; + MPI_Aint displacements[4]; + int block_lengths[4]; + int field_count; + int mpi_code; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC + + HDassert(contig_type); + HDassert(contig_type_derived); + HDassert(resized_type); + HDassert(resized_type_derived); + + *contig_type_derived = FALSE; + *resized_type_derived = FALSE; + + /* Create struct type for an H5F_block_t structure */ + if (H5F_mpi_get_file_block_type(FALSE, &chunk_block_type, &chunk_block_type_derived) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't create derived type for chunk file description") + + field_count = 4; + HDassert(field_count == (sizeof(types) / sizeof(MPI_Datatype))); + + /* + * Create struct type to pack information into memory as follows: + * + * Chunk's new Offset/Size (H5F_block_t) -> + * Chunk Index Info (H5D_chunk_index_info_t) + */ + block_lengths[0] = 1; + block_lengths[1] = 1; + block_lengths[2] = 1; + block_lengths[3] = 1; + displacements[0] = offsetof(H5D_chunk_insert_info_t, chunk_block); + displacements[1] = offsetof(H5D_chunk_insert_info_t, index_info.chunk_idx); + displacements[2] = offsetof(H5D_chunk_insert_info_t, index_info.filter_mask); + displacements[3] = offsetof(H5D_chunk_insert_info_t, index_info.need_insert); + types[0] = chunk_block_type; + types[1] = HSIZE_AS_MPI_TYPE; + types[2] = MPI_UNSIGNED; + types[3] = MPI_C_BOOL; + if (MPI_SUCCESS != + (mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, &struct_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) + struct_type_derived = TRUE; + + contig_type_extent = (MPI_Aint)(sizeof(H5F_block_t) + sizeof(H5D_chunk_index_info_t)); + + if (MPI_SUCCESS != (mpi_code = MPI_Type_create_resized(struct_type, 0, contig_type_extent, contig_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_resized failed", mpi_code) + *contig_type_derived = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(contig_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + + struct_type_derived = FALSE; + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&struct_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + + /* + * Create struct type to correctly extract all needed + * information from a H5D_filtered_collective_io_info_t + * structure. + */ + displacements[0] = offsetof(H5D_filtered_collective_io_info_t, chunk_new); + displacements[1] = offsetof(H5D_filtered_collective_io_info_t, index_info.chunk_idx); + displacements[2] = offsetof(H5D_filtered_collective_io_info_t, index_info.filter_mask); + displacements[3] = offsetof(H5D_filtered_collective_io_info_t, index_info.need_insert); + if (MPI_SUCCESS != + (mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, &struct_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) + struct_type_derived = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_create_resized( + struct_type, 0, sizeof(H5D_filtered_collective_io_info_t), resized_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_resized failed", mpi_code) + *resized_type_derived = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(resized_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + +done: + if (struct_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&struct_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + } + if (chunk_block_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&chunk_block_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + } + + if (ret_value < 0) { + if (*resized_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(resized_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + *resized_type_derived = FALSE; + } + if (*contig_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(contig_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + *contig_type_derived = FALSE; + } + } + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__mpio_get_chunk_insert_info_types() */ + +/*------------------------------------------------------------------------- + * Function: H5D__mpio_collective_filtered_io_type + * + * Purpose: Constructs a MPI derived datatype for both the memory and + * the file for a collective I/O operation on filtered chunks. + * The datatype contains the chunk offsets and lengths in the + * file and the locations of the chunk data buffers to read + * into/write from. + * + * Return: Non-negative on success/Negative on failure + * + *------------------------------------------------------------------------- + */ +static herr_t +H5D__mpio_collective_filtered_io_type(H5D_filtered_collective_io_info_t *chunk_list, size_t num_entries, + H5D_io_op_type_t op_type, MPI_Datatype *new_mem_type, + hbool_t *mem_type_derived, MPI_Datatype *new_file_type, + hbool_t *file_type_derived) +{ + MPI_Aint *io_buf_array = NULL; /* Relative displacements of filtered chunk data buffers */ + MPI_Aint *file_offset_array = NULL; /* Chunk offsets in the file */ + int * length_array = NULL; /* Filtered Chunk lengths */ + int mpi_code; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC + + HDassert(chunk_list || 0 == num_entries); + HDassert(new_mem_type); + HDassert(mem_type_derived); + HDassert(new_file_type); + HDassert(file_type_derived); + + *mem_type_derived = FALSE; + *file_type_derived = FALSE; + *new_mem_type = MPI_BYTE; + *new_file_type = MPI_BYTE; + + if (num_entries > 0) { + H5F_block_t *chunk_block; + size_t last_valid_idx = 0; + size_t i; + int chunk_count; + + /* + * Determine number of chunks for I/O operation and + * setup for derived datatype creation if I/O operation + * includes multiple chunks + */ + if (num_entries == 1) { + /* Set last valid index to 0 for contiguous datatype creation */ + last_valid_idx = 0; + + if (op_type == H5D_IO_OP_WRITE) + chunk_count = 1; + else + chunk_count = chunk_list[0].need_read ? 1 : 0; + } + else { + MPI_Aint chunk_buf; + MPI_Aint base_buf; + haddr_t base_offset = HADDR_UNDEF; + + H5_CHECK_OVERFLOW(num_entries, size_t, int); + + /* Allocate arrays */ + if (NULL == (length_array = H5MM_malloc((size_t)num_entries * sizeof(int)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for filtered collective I/O length array") + if (NULL == (io_buf_array = H5MM_malloc((size_t)num_entries * sizeof(MPI_Aint)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for filtered collective I/O buf length array") + if (NULL == (file_offset_array = H5MM_malloc((size_t)num_entries * sizeof(MPI_Aint)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for filtered collective I/O offset array") + + /* + * If doing a write, we can set the base chunk offset + * and base chunk data buffer right away. + * + * If doing a read, some chunks may be skipped over + * for reading if they aren't yet allocated in the + * file. Therefore, we have to find the first chunk + * actually being read in order to set the base chunk + * offset and base chunk data buffer. + */ + if (op_type == H5D_IO_OP_WRITE) { +#if MPI_VERSION >= 3 + if (MPI_SUCCESS != (mpi_code = MPI_Get_address(chunk_list[0].buf, &base_buf))) + HMPI_GOTO_ERROR(FAIL, "MPI_Get_address failed", mpi_code) +#else + base_buf = (MPI_Aint)chunk_list[0].buf; +#endif + + base_offset = chunk_list[0].chunk_new.offset; + } + + for (i = 0, chunk_count = 0; i < num_entries; i++) { + if (op_type == H5D_IO_OP_READ) { + /* + * If this chunk isn't being read, don't add it + * to the MPI type we're building up for I/O + */ + if (!chunk_list[i].need_read) + continue; + + /* + * If this chunk is being read, go ahead and + * set the base chunk offset and base chunk + * data buffer if we haven't already + */ + if (!H5F_addr_defined(base_offset)) { +#if MPI_VERSION >= 3 + if (MPI_SUCCESS != (mpi_code = MPI_Get_address(chunk_list[i].buf, &base_buf))) + HMPI_GOTO_ERROR(FAIL, "MPI_Get_address failed", mpi_code) +#else + base_buf = (MPI_Aint)chunk_list[i].buf; +#endif + + base_offset = chunk_list[i].chunk_current.offset; + } + } + + /* Set convenience pointer for current chunk block */ + chunk_block = + (op_type == H5D_IO_OP_READ) ? &chunk_list[i].chunk_current : &chunk_list[i].chunk_new; + + /* + * Set the current chunk entry's offset in the file, relative to + * the first chunk entry + */ + HDassert(H5F_addr_defined(chunk_block->offset)); + file_offset_array[chunk_count] = (MPI_Aint)(chunk_block->offset - base_offset); + + /* + * Ensure the chunk list is sorted in ascending ordering of + * offset in the file + */ + if (chunk_count) + HDassert(file_offset_array[chunk_count] > file_offset_array[chunk_count - 1]); + + /* Set the current chunk entry's size for the I/O operation */ + H5_CHECK_OVERFLOW(chunk_block->length, hsize_t, int); + length_array[chunk_count] = (int)chunk_block->length; + + /* + * Set the displacement of the chunk entry's chunk data buffer, + * relative to the first entry's data buffer + */ +#if MPI_VERSION >= 3 && MPI_SUBVERSION >= 1 + if (MPI_SUCCESS != (mpi_code = MPI_Get_address(chunk_list[i].buf, &chunk_buf))) + HMPI_GOTO_ERROR(FAIL, "MPI_Get_address failed", mpi_code) + + io_buf_array[chunk_count] = MPI_Aint_diff(chunk_buf, base_buf); +#else + chunk_buf = (MPI_Aint)chunk_list[i].buf; + io_buf_array[chunk_count] = chunk_buf - base_buf; +#endif + + /* + * Set last valid index in case only a single chunk will + * be involved in the I/O operation + */ + last_valid_idx = i; + + chunk_count++; + } /* end for */ + } + + /* + * Create derived datatypes for the chunk list if this + * rank has any chunks to work on + */ + if (chunk_count > 0) { + if (chunk_count == 1) { + int chunk_len; + + /* Single chunk - use a contiguous type for both memory and file */ + + /* Ensure that we can cast chunk size to an int for MPI */ + chunk_block = (op_type == H5D_IO_OP_READ) ? &chunk_list[last_valid_idx].chunk_current + : &chunk_list[last_valid_idx].chunk_new; + H5_CHECKED_ASSIGN(chunk_len, int, chunk_block->length, hsize_t); + + if (MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(chunk_len, MPI_BYTE, new_file_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + *new_mem_type = *new_file_type; + + /* + * Since we use the same datatype for both memory and file, only + * mark the file type as derived so the caller doesn't try to + * free the same type twice + */ + *mem_type_derived = FALSE; + *file_type_derived = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_file_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + } + else { + HDassert(file_offset_array); + HDassert(length_array); + HDassert(io_buf_array); + + /* Multiple chunks - use an hindexed type for both memory and file */ + + /* Create memory MPI type */ + if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed( + chunk_count, length_array, io_buf_array, MPI_BYTE, new_mem_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) + *mem_type_derived = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_mem_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + + /* Create file MPI type */ + if (MPI_SUCCESS != + (mpi_code = MPI_Type_create_hindexed(chunk_count, length_array, file_offset_array, + MPI_BYTE, new_file_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) + *file_type_derived = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_file_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + } + } + } /* end if */ done: - if (chunk_entry->async_info.receive_buffer_array) - H5MM_free(chunk_entry->async_info.receive_buffer_array); - if (chunk_entry->async_info.receive_requests_array) - H5MM_free(chunk_entry->async_info.receive_requests_array); - if (tmp_gath_buf) - H5MM_free(tmp_gath_buf); - if (file_iter_init && H5S_SELECT_ITER_RELEASE(file_iter) < 0) - HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator") - if (file_iter) - H5MM_free(file_iter); - if (mem_iter_init && H5S_SELECT_ITER_RELEASE(mem_iter) < 0) - HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator") - if (mem_iter) - H5MM_free(mem_iter); - if (dataspace) - if (H5S_close(dataspace) < 0) - HDONE_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "can't close dataspace") + if (file_offset_array) + H5MM_free(file_offset_array); + if (io_buf_array) + H5MM_free(io_buf_array); + if (length_array) + H5MM_free(length_array); + + if (ret_value < 0) { + if (*file_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(new_file_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + *file_type_derived = FALSE; + } + if (*mem_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(new_mem_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + *mem_type_derived = FALSE; + } + } FUNC_LEAVE_NOAPI(ret_value) -} /* end H5D__filtered_collective_chunk_entry_io() */ +} /* end H5D__mpio_collective_filtered_io_type() */ + +#ifdef H5Dmpio_DEBUG + +static herr_t +H5D__mpio_dump_collective_filtered_chunk_list(H5D_filtered_collective_io_info_t *chunk_list, + size_t chunk_list_num_entries, int mpi_rank) +{ + H5D_filtered_collective_io_info_t *chunk_entry; + size_t i; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC_NOERR + + H5D_MPIO_DEBUG(mpi_rank, "CHUNK LIST: ["); + for (i = 0; i < chunk_list_num_entries; i++) { + unsigned chunk_rank; + + chunk_entry = &chunk_list[i]; + + HDassert(chunk_entry->chunk_info); + chunk_rank = (unsigned)H5S_GET_EXTENT_NDIMS(chunk_entry->chunk_info->fspace); + + H5D_MPIO_DEBUG(mpi_rank, " {"); + H5D_MPIO_DEBUG_VA(mpi_rank, " - Entry %zu -", i); + + H5D_MPIO_DEBUG(mpi_rank, " - Chunk Fspace Info -"); + H5D_MPIO_DEBUG_VA(mpi_rank, + " Chunk Current Info: { Offset: %" PRIuHADDR ", Length: %" PRIuHADDR " }", + chunk_entry->chunk_current.offset, chunk_entry->chunk_current.length); + H5D_MPIO_DEBUG_VA(mpi_rank, " Chunk New Info: { Offset: %" PRIuHADDR ", Length: %" PRIuHADDR " }", + chunk_entry->chunk_new.offset, chunk_entry->chunk_new.length); + + H5D_MPIO_DEBUG(mpi_rank, " - Chunk Insert Info -"); + H5D_MPIO_DEBUG_VA(mpi_rank, + " Chunk Scaled Coords (4-d): { %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE + ", %" PRIuHSIZE " }", + chunk_rank < 1 ? 0 : chunk_entry->chunk_info->scaled[0], + chunk_rank < 2 ? 0 : chunk_entry->chunk_info->scaled[1], + chunk_rank < 3 ? 0 : chunk_entry->chunk_info->scaled[2], + chunk_rank < 4 ? 0 : chunk_entry->chunk_info->scaled[3]); + H5D_MPIO_DEBUG_VA(mpi_rank, " Chunk Index: %" PRIuHSIZE, chunk_entry->index_info.chunk_idx); + H5D_MPIO_DEBUG_VA(mpi_rank, " Filter Mask: %u", chunk_entry->index_info.filter_mask); + H5D_MPIO_DEBUG_VA(mpi_rank, " Need Insert: %s", + chunk_entry->index_info.need_insert ? "YES" : "NO"); + + H5D_MPIO_DEBUG(mpi_rank, " - Other Info -"); + H5D_MPIO_DEBUG_VA(mpi_rank, " Chunk Info Ptr: %p", (void *)chunk_entry->chunk_info); + H5D_MPIO_DEBUG_VA(mpi_rank, " Need Read: %s", chunk_entry->need_read ? "YES" : "NO"); + H5D_MPIO_DEBUG_VA(mpi_rank, " Chunk I/O Size: %zu", chunk_entry->io_size); + H5D_MPIO_DEBUG_VA(mpi_rank, " Chunk Buffer Size: %zu", chunk_entry->chunk_buf_size); + H5D_MPIO_DEBUG_VA(mpi_rank, " Original Owner: %d", chunk_entry->orig_owner); + H5D_MPIO_DEBUG_VA(mpi_rank, " New Owner: %d", chunk_entry->new_owner); + H5D_MPIO_DEBUG_VA(mpi_rank, " # of Writers: %d", chunk_entry->num_writers); + H5D_MPIO_DEBUG_VA(mpi_rank, " Chunk Data Buffer Ptr: %p", (void *)chunk_entry->buf); + + H5D_MPIO_DEBUG(mpi_rank, " }"); + } + H5D_MPIO_DEBUG(mpi_rank, "]"); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__mpio_dump_collective_filtered_chunk_list() */ + +#endif + #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h index 49c95a5..a424929 100644 --- a/src/H5Dpkg.h +++ b/src/H5Dpkg.h @@ -559,6 +559,7 @@ H5_DLL herr_t H5D__alloc_storage(const H5D_io_info_t *io_info, H5D_time_alloc_t hbool_t full_overwrite, hsize_t old_dim[]); H5_DLL herr_t H5D__get_storage_size(const H5D_t *dset, hsize_t *storage_size); H5_DLL herr_t H5D__get_chunk_storage_size(H5D_t *dset, const hsize_t *offset, hsize_t *storage_size); +H5_DLL herr_t H5D__chunk_index_empty(const H5D_t *dset, hbool_t *empty); H5_DLL herr_t H5D__get_num_chunks(const H5D_t *dset, const H5S_t *space, hsize_t *nchunks); H5_DLL herr_t H5D__get_chunk_info(const H5D_t *dset, const H5S_t *space, hsize_t chk_idx, hsize_t *coord, unsigned *filter_mask, haddr_t *offset, hsize_t *size); @@ -591,6 +592,10 @@ H5_DLL herr_t H5D__select_read(const H5D_io_info_t *io_info, const H5D_type_info H5_DLL herr_t H5D__select_write(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts, H5S_t *file_space, H5S_t *mem_space); +/* Functions that perform direct copying between memory buffers */ +H5_DLL herr_t H5D_select_io_mem(void *dst_buf, const H5S_t *dst_space, const void *src_buf, + const H5S_t *src_space, size_t elmt_size, size_t nelmts); + /* Functions that perform scatter-gather serial I/O operations */ H5_DLL herr_t H5D__scatter_mem(const void *_tscat_buf, H5S_sel_iter_t *iter, size_t nelmts, void *_buf); H5_DLL size_t H5D__gather_mem(const void *_buf, H5S_sel_iter_t *iter, size_t nelmts, @@ -635,7 +640,13 @@ H5_DLL herr_t H5D__chunk_allocate(const H5D_io_info_t *io_info, hbool_t full_ov const hsize_t old_dim[]); H5_DLL herr_t H5D__chunk_file_alloc(const H5D_chk_idx_info_t *idx_info, const H5F_block_t *old_chunk, H5F_block_t *new_chunk, hbool_t *need_insert, const hsize_t *scaled); +H5_DLL void * H5D__chunk_mem_alloc(size_t size, const H5O_pline_t *pline); +H5_DLL void H5D__chunk_mem_free(void *chk, const void *_pline); +H5_DLL void * H5D__chunk_mem_xfree(void *chk, const void *pline); +H5_DLL void * H5D__chunk_mem_realloc(void *chk, size_t size, const H5O_pline_t *pline); H5_DLL herr_t H5D__chunk_update_old_edge_chunks(H5D_t *dset, hsize_t old_dim[]); +H5_DLL hbool_t H5D__chunk_is_partial_edge_chunk(unsigned dset_ndims, const uint32_t *chunk_dims, + const hsize_t *chunk_scaled, const hsize_t *dset_dims); H5_DLL herr_t H5D__chunk_prune_by_extent(H5D_t *dset, const hsize_t *old_dim); H5_DLL herr_t H5D__chunk_set_sizes(H5D_t *dset); #ifdef H5_HAVE_PARALLEL @@ -694,11 +705,11 @@ H5_DLL herr_t H5D__fill_term(H5D_fill_buf_info_t *fb_info); #ifdef H5_HAVE_PARALLEL -#ifdef H5S_DEBUG +#ifdef H5D_DEBUG #ifndef H5Dmpio_DEBUG #define H5Dmpio_DEBUG #endif /*H5Dmpio_DEBUG*/ -#endif /*H5S_DEBUG*/ +#endif /*H5D_DEBUG*/ /* MPI-IO function to read, it will select either regular or irregular read */ H5_DLL herr_t H5D__mpio_select_read(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts, H5S_t *file_space, H5S_t *mem_space); @@ -727,6 +738,8 @@ H5_DLL herr_t H5D__chunk_collective_write(H5D_io_info_t *io_info, const H5D_type * memory and the file */ H5_DLL htri_t H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space, const H5S_t *mem_space, const H5D_type_info_t *type_info); +H5_DLL herr_t H5D__mpio_get_no_coll_cause_strings(char *local_cause, size_t local_cause_len, + char *global_cause, size_t global_cause_len); #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5Dselect.c b/src/H5Dselect.c index e64d657..f464ca5 100644 --- a/src/H5Dselect.c +++ b/src/H5Dselect.c @@ -105,6 +105,9 @@ H5D__select_io(const H5D_io_info_t *io_info, size_t elmt_size, size_t nelmts, H5 HDassert(io_info->store); HDassert(io_info->u.rbuf); + if (elmt_size == 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "invalid elmt_size of 0") + /* Check for only one element in selection */ if (nelmts == 1) { hsize_t single_mem_off; /* Offset in memory */ @@ -226,8 +229,6 @@ H5D__select_io(const H5D_io_info_t *io_info, size_t elmt_size, size_t nelmts, H5 /* Decrement number of elements left to process */ HDassert(((size_t)tmp_file_len % elmt_size) == 0); - if (elmt_size == 0) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "Resulted in division by zero") nelmts -= ((size_t)tmp_file_len / elmt_size); } /* end while */ } /* end else */ @@ -257,6 +258,188 @@ done: } /* end H5D__select_io() */ /*------------------------------------------------------------------------- + * Function: H5D_select_io_mem + * + * Purpose: Perform memory copies directly between two memory buffers + * according to the selections in the `dst_space` and + * `src_space` dataspaces. + * + * Note: This routine is [basically] the same as H5D__select_io, + * with the only difference being that the readvv/writevv + * calls are exchanged for H5VM_memcpyvv calls. Changes should + * be made to both routines. + * + * Return: Non-negative on success/Negative on failure + * + *------------------------------------------------------------------------- + */ +herr_t +H5D_select_io_mem(void *dst_buf, const H5S_t *dst_space, const void *src_buf, const H5S_t *src_space, + size_t elmt_size, size_t nelmts) +{ + H5S_sel_iter_t *dst_sel_iter = NULL; /* Destination dataspace iteration info */ + H5S_sel_iter_t *src_sel_iter = NULL; /* Source dataspace iteration info */ + hbool_t dst_sel_iter_init = FALSE; /* Destination dataspace selection iterator initialized? */ + hbool_t src_sel_iter_init = FALSE; /* Source dataspace selection iterator initialized? */ + hsize_t * dst_off = NULL; /* Pointer to sequence offsets in destination buffer */ + hsize_t * src_off = NULL; /* Pointer to sequence offsets in source buffer */ + size_t * dst_len = NULL; /* Pointer to sequence lengths in destination buffer */ + size_t * src_len = NULL; /* Pointer to sequence lengths in source buffer */ + size_t curr_dst_seq; /* Current destination buffer sequence to operate on */ + size_t curr_src_seq; /* Current source buffer sequence to operate on */ + size_t dst_nseq; /* Number of sequences generated for destination buffer */ + size_t src_nseq; /* Number of sequences generated for source buffer */ + size_t dxpl_vec_size; /* Vector length from API context's DXPL */ + size_t vec_size; /* Vector length */ + ssize_t bytes_copied; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI(FAIL) + + HDassert(dst_buf); + HDassert(dst_space); + HDassert(src_buf); + HDassert(src_space); + + if (elmt_size == 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "invalid elmt_size of 0") + + /* Check for only one element in selection */ + if (nelmts == 1) { + hsize_t single_dst_off; /* Offset in dst_space */ + hsize_t single_src_off; /* Offset in src_space */ + size_t single_dst_len; /* Length in dst_space */ + size_t single_src_len; /* Length in src_space */ + + /* Get offset of first element in selections */ + if (H5S_SELECT_OFFSET(dst_space, &single_dst_off) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve destination selection offset") + if (H5S_SELECT_OFFSET(src_space, &single_src_off) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve source selection offset") + + /* Set up necessary information for I/O operation */ + dst_nseq = src_nseq = 1; + curr_dst_seq = curr_src_seq = 0; + single_dst_off *= elmt_size; + single_src_off *= elmt_size; + single_dst_len = single_src_len = elmt_size; + + /* Perform vectorized memcpy from src_buf to dst_buf */ + if ((bytes_copied = + H5VM_memcpyvv(dst_buf, dst_nseq, &curr_dst_seq, &single_dst_len, &single_dst_off, src_buf, + src_nseq, &curr_src_seq, &single_src_len, &single_src_off)) < 0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "vectorized memcpy failed") + + HDassert(((size_t)bytes_copied % elmt_size) == 0); + } + else { + unsigned sel_iter_flags = H5S_SEL_ITER_GET_SEQ_LIST_SORTED | H5S_SEL_ITER_SHARE_WITH_DATASPACE; + size_t dst_nelem; /* Number of elements used in destination buffer sequences */ + size_t src_nelem; /* Number of elements used in source buffer sequences */ + + /* Get info from API context */ + if (H5CX_get_vec_size(&dxpl_vec_size) < 0) + HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "can't retrieve I/O vector size") + + /* Allocate the vector I/O arrays */ + if (dxpl_vec_size > H5D_IO_VECTOR_SIZE) + vec_size = dxpl_vec_size; + else + vec_size = H5D_IO_VECTOR_SIZE; + + if (NULL == (dst_len = H5FL_SEQ_MALLOC(size_t, vec_size))) + HGOTO_ERROR(H5E_IO, H5E_CANTALLOC, FAIL, "can't allocate I/O length vector array") + if (NULL == (dst_off = H5FL_SEQ_MALLOC(hsize_t, vec_size))) + HGOTO_ERROR(H5E_IO, H5E_CANTALLOC, FAIL, "can't allocate I/O offset vector array") + if (NULL == (src_len = H5FL_SEQ_MALLOC(size_t, vec_size))) + HGOTO_ERROR(H5E_IO, H5E_CANTALLOC, FAIL, "can't allocate I/O length vector array") + if (NULL == (src_off = H5FL_SEQ_MALLOC(hsize_t, vec_size))) + HGOTO_ERROR(H5E_IO, H5E_CANTALLOC, FAIL, "can't allocate I/O offset vector array") + + /* Allocate the dataspace selection iterators */ + if (NULL == (dst_sel_iter = H5FL_MALLOC(H5S_sel_iter_t))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate destination selection iterator") + if (NULL == (src_sel_iter = H5FL_MALLOC(H5S_sel_iter_t))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate source selection iterator") + + /* Initialize destination selection iterator */ + if (H5S_select_iter_init(dst_sel_iter, dst_space, elmt_size, sel_iter_flags) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator") + dst_sel_iter_init = TRUE; /* Destination selection iteration info has been initialized */ + + /* Initialize source selection iterator */ + if (H5S_select_iter_init(src_sel_iter, src_space, elmt_size, H5S_SEL_ITER_SHARE_WITH_DATASPACE) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator") + src_sel_iter_init = TRUE; /* Source selection iteration info has been initialized */ + + /* Initialize sequence counts */ + curr_dst_seq = curr_src_seq = 0; + dst_nseq = src_nseq = 0; + + /* Loop, until all bytes are processed */ + while (nelmts > 0) { + /* Check if more destination buffer sequences are needed */ + if (curr_dst_seq >= dst_nseq) { + /* Get sequences for destination selection */ + if (H5S_SELECT_ITER_GET_SEQ_LIST(dst_sel_iter, vec_size, nelmts, &dst_nseq, &dst_nelem, + dst_off, dst_len) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "sequence length generation failed") + + /* Start at the beginning of the sequences again */ + curr_dst_seq = 0; + } + + /* Check if more source buffer sequences are needed */ + if (curr_src_seq >= src_nseq) { + /* Get sequences for source selection */ + if (H5S_SELECT_ITER_GET_SEQ_LIST(src_sel_iter, vec_size, nelmts, &src_nseq, &src_nelem, + src_off, src_len) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "sequence length generation failed") + + /* Start at the beginning of the sequences again */ + curr_src_seq = 0; + } /* end if */ + + /* Perform vectorized memcpy from src_buf to dst_buf */ + if ((bytes_copied = H5VM_memcpyvv(dst_buf, dst_nseq, &curr_dst_seq, dst_len, dst_off, src_buf, + src_nseq, &curr_src_seq, src_len, src_off)) < 0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "vectorized memcpy failed") + + /* Decrement number of elements left to process */ + HDassert(((size_t)bytes_copied % elmt_size) == 0); + nelmts -= ((size_t)bytes_copied / elmt_size); + } + } + +done: + /* Release selection iterators */ + if (src_sel_iter) { + if (src_sel_iter_init && H5S_SELECT_ITER_RELEASE(src_sel_iter) < 0) + HDONE_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection iterator") + + src_sel_iter = H5FL_FREE(H5S_sel_iter_t, src_sel_iter); + } + if (dst_sel_iter) { + if (dst_sel_iter_init && H5S_SELECT_ITER_RELEASE(dst_sel_iter) < 0) + HDONE_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection iterator") + + dst_sel_iter = H5FL_FREE(H5S_sel_iter_t, dst_sel_iter); + } + + /* Release vector arrays, if allocated */ + if (src_off) + src_off = H5FL_SEQ_FREE(hsize_t, src_off); + if (src_len) + src_len = H5FL_SEQ_FREE(size_t, src_len); + if (dst_off) + dst_off = H5FL_SEQ_FREE(hsize_t, dst_off); + if (dst_len) + dst_len = H5FL_SEQ_FREE(size_t, dst_len); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D_select_io_mem() */ + +/*------------------------------------------------------------------------- * Function: H5D__select_read * * Purpose: Reads directly from file into application memory. diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index 1969899..4aa8a96 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -188,6 +188,41 @@ H5FD__mpio_parse_debug_str(const char *s) FUNC_LEAVE_NOAPI_VOID } /* end H5FD__mpio_parse_debug_str() */ + +/*--------------------------------------------------------------------------- + * Function: H5FD__mem_t_to_str + * + * Purpose: Returns a string representing the enum value in an H5FD_mem_t + * enum + * + * Returns: H5FD_mem_t enum value string + * + *--------------------------------------------------------------------------- + */ +static const char * +H5FD__mem_t_to_str(H5FD_mem_t mem_type) +{ + switch (mem_type) { + case H5FD_MEM_NOLIST: + return "H5FD_MEM_NOLIST"; + case H5FD_MEM_DEFAULT: + return "H5FD_MEM_DEFAULT"; + case H5FD_MEM_SUPER: + return "H5FD_MEM_SUPER"; + case H5FD_MEM_BTREE: + return "H5FD_MEM_BTREE"; + case H5FD_MEM_DRAW: + return "H5FD_MEM_DRAW"; + case H5FD_MEM_GHEAP: + return "H5FD_MEM_GHEAP"; + case H5FD_MEM_LHEAP: + return "H5FD_MEM_LHEAP"; + case H5FD_MEM_OHDR: + return "H5FD_MEM_OHDR"; + default: + return "(Unknown)"; + } +} #endif /* H5FDmpio_DEBUG */ /*------------------------------------------------------------------------- @@ -994,7 +1029,6 @@ H5FD__mpio_query(const H5FD_t H5_ATTR_UNUSED *_file, unsigned long *flags /* out *flags |= H5FD_FEAT_AGGREGATE_METADATA; /* OK to aggregate metadata allocations */ *flags |= H5FD_FEAT_AGGREGATE_SMALLDATA; /* OK to aggregate "small" raw data allocations */ *flags |= H5FD_FEAT_HAS_MPI; /* This driver uses MPI */ - *flags |= H5FD_FEAT_ALLOCATE_EARLY; /* Allocate space early instead of late */ *flags |= H5FD_FEAT_DEFAULT_VFD_COMPATIBLE; /* VFD creates a file which can be opened with the default VFD */ } /* end if */ @@ -1380,8 +1414,8 @@ H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, hid_t H5_ATTR_UNU #ifdef H5FDmpio_DEBUG if (H5FD_mpio_debug_r_flag) - HDfprintf(stderr, "%s: (%d) mpi_off = %ld bytes_read = %lld\n", __func__, file->mpi_rank, - (long)mpi_off, bytes_read); + HDfprintf(stderr, "%s: (%d) mpi_off = %ld bytes_read = %lld type = %s\n", __func__, file->mpi_rank, + (long)mpi_off, bytes_read, H5FD__mem_t_to_str(type)); #endif /* @@ -1601,8 +1635,8 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id, h #ifdef H5FDmpio_DEBUG if (H5FD_mpio_debug_w_flag) - HDfprintf(stderr, "%s: (%d) mpi_off = %ld bytes_written = %lld\n", __func__, file->mpi_rank, - (long)mpi_off, bytes_written); + HDfprintf(stderr, "%s: (%d) mpi_off = %ld bytes_written = %lld type = %s\n", __func__, + file->mpi_rank, (long)mpi_off, bytes_written, H5FD__mem_t_to_str(type)); #endif /* Each process will keep track of its perceived EOF value locally, and diff --git a/src/H5Fmpi.c b/src/H5Fmpi.c index 78290c6..02d8d52 100644 --- a/src/H5Fmpi.c +++ b/src/H5Fmpi.c @@ -524,4 +524,68 @@ H5F_set_coll_metadata_reads(H5F_t *file, H5P_coll_md_read_flag_t *file_flag, hbo FUNC_LEAVE_NOAPI_VOID } /* end H5F_set_coll_metadata_reads() */ +/*------------------------------------------------------------------------- + * Function: H5F_mpi_get_file_block_type + * + * Purpose: Creates an MPI derived datatype for communicating an + * H5F_block_t structure. If `commit` is specified as TRUE, + * the resulting datatype will be committed and ready for + * use in communication. Otherwise, the type is only suitable + * for building other derived types. + * + * If TRUE is returned through `new_type_derived`, this lets + * the caller know that the datatype has been derived and + * should be freed with MPI_Type_free once it is no longer + * needed. + * + * Return: Non-negative on success/Negative on failure + * + *------------------------------------------------------------------------- + */ +herr_t +H5F_mpi_get_file_block_type(hbool_t commit, MPI_Datatype *new_type, hbool_t *new_type_derived) +{ + MPI_Datatype types[2]; + MPI_Aint displacements[2]; + int block_lengths[2]; + int field_count; + int mpi_code; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI(FAIL) + + HDassert(new_type); + HDassert(new_type_derived); + + *new_type_derived = FALSE; + + field_count = 2; + HDassert(field_count == sizeof(types) / sizeof(MPI_Datatype)); + + block_lengths[0] = 1; + block_lengths[1] = 1; + displacements[0] = offsetof(H5F_block_t, offset); + displacements[1] = offsetof(H5F_block_t, length); + types[0] = HADDR_AS_MPI_TYPE; + types[1] = HSIZE_AS_MPI_TYPE; + if (MPI_SUCCESS != + (mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, new_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) + *new_type_derived = TRUE; + + if (commit && MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + +done: + if (ret_value < 0) { + if (*new_type_derived) { + if (MPI_SUCCESS != (mpi_code = MPI_Type_free(new_type))) + HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + *new_type_derived = FALSE; + } + } + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5F_mpi_get_file_block_type() */ + #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5Fprivate.h b/src/H5Fprivate.h index af65c9d..67e153e 100644 --- a/src/H5Fprivate.h +++ b/src/H5Fprivate.h @@ -962,7 +962,8 @@ H5_DLL MPI_Comm H5F_mpi_get_comm(const H5F_t *f); H5_DLL int H5F_shared_mpi_get_size(const H5F_shared_t *f_sh); H5_DLL int H5F_mpi_get_size(const H5F_t *f); H5_DLL herr_t H5F_mpi_retrieve_comm(hid_t loc_id, hid_t acspl_id, MPI_Comm *mpi_comm); -H5_DLL hbool_t H5F_get_coll_metadata_reads(const H5F_t *f); +H5_DLL herr_t H5F_mpi_get_file_block_type(hbool_t commit, MPI_Datatype *new_type, hbool_t *new_type_derived); +H5_DLL hbool_t H5F_get_coll_metadata_reads(const H5F_t *f); H5_DLL void H5F_set_coll_metadata_reads(H5F_t *f, H5P_coll_md_read_flag_t *file_flag, hbool_t *context_flag); #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5mpi.c b/src/H5mpi.c index aea0104..15fb785 100644 --- a/src/H5mpi.c +++ b/src/H5mpi.c @@ -549,4 +549,237 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5_mpio_create_large_type() */ +/*------------------------------------------------------------------------- + * Function: H5_mpio_gatherv_alloc + * + * Purpose: A wrapper around MPI_(All)gatherv that performs allocation + * of the receive buffer on the caller's behalf. This + * routine's parameters are as follows: + * + * `send_buf` - The buffer that data will be sent from for + * the calling MPI rank. Analogous to + * MPI_(All)gatherv's `sendbuf` parameter. + * + * `send_count` - The number of `send_type` elements in the + * send buffer. Analogous to MPI_(All)gatherv's + * `sendcount` parameter. + * + * `send_type` - The MPI Datatype of the elements in the send + * buffer. Analogous to MPI_(All)gatherv's + * `sendtype` parameter. + * + * `recv_counts` - An array containing the number of elements + * to be received from each MPI rank. + * Analogous to MPI_(All)gatherv's `recvcount` + * parameter. + * + * `displacements` - An array containing the displacements + * in the receive buffer where data from + * each MPI rank should be placed. Analogous + * to MPI_(All)gatherv's `displs` parameter. + * + * `recv_type` - The MPI Datatype of the elements in the + * receive buffer. Analogous to + * MPI_(All)gatherv's `recvtype` parameter. + * + * `allgather` - Specifies whether the gather operation to be + * performed should be MPI_Allgatherv (TRUE) or + * MPI_Gatherv (FALSE). + * + * `root` - For MPI_Gatherv operations, specifies the rank + * that will receive the data sent by other ranks. + * Analogous to MPI_Gatherv's `root` parameter. For + * MPI_Allgatherv operations, this parameter is + * ignored. + * + * `comm` - Specifies the MPI Communicator for the operation. + * Analogous to MPI_(All)gatherv's `comm` parameter. + * + * `mpi_rank` - Specifies the calling rank's rank value, as + * obtained by calling MPI_Comm_rank on the + * MPI Communicator `comm`. + * + * `mpi_size` - Specifies the MPI Communicator size, as + * obtained by calling MPI_Comm_size on the + * MPI Communicator `comm`. + * + * `out_buf` - Resulting buffer that is allocated and + * returned to the caller after data has been + * gathered into it. Returned only to the rank + * specified by `root` for MPI_Gatherv + * operations, or to all ranks for + * MPI_Allgatherv operations. + * + * `out_buf_num_entries` - The number of elements in the + * resulting buffer, in terms of + * the MPI Datatype provided for + * `recv_type`. + * + * Notes: This routine is collective across `comm`. + * + * Return: Non-negative on success/Negative on failure + * + *------------------------------------------------------------------------- + */ +herr_t +H5_mpio_gatherv_alloc(void *send_buf, int send_count, MPI_Datatype send_type, const int recv_counts[], + const int displacements[], MPI_Datatype recv_type, hbool_t allgather, int root, + MPI_Comm comm, int mpi_rank, int mpi_size, void **out_buf, size_t *out_buf_num_entries) +{ + size_t recv_buf_num_entries = 0; + void * recv_buf = NULL; +#if MPI_VERSION >= 3 + MPI_Count type_lb; + MPI_Count type_extent; +#else + MPI_Aint type_lb; + MPI_Aint type_extent; +#endif + int mpi_code; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI(FAIL) + + HDassert(send_buf || send_count == 0); + if (allgather || (mpi_rank == root)) + HDassert(out_buf && out_buf_num_entries); + + /* Retrieve the extent of the MPI Datatype being used */ +#if MPI_VERSION >= 3 + if (MPI_SUCCESS != (mpi_code = MPI_Type_get_extent_x(recv_type, &type_lb, &type_extent))) +#else + if (MPI_SUCCESS != (mpi_code = MPI_Type_get_extent(recv_type, &type_lb, &type_extent))) +#endif + HMPI_GOTO_ERROR(FAIL, "MPI_Type_get_extent(_x) failed", mpi_code) + + if (type_extent < 0) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "MPI recv_type had a negative extent") + + /* + * Calculate the total size of the buffer being + * returned and allocate it + */ + if (allgather || (mpi_rank == root)) { + size_t i; + size_t buf_size; + + for (i = 0, recv_buf_num_entries = 0; i < (size_t)mpi_size; i++) + recv_buf_num_entries += (size_t)recv_counts[i]; + buf_size = recv_buf_num_entries * (size_t)type_extent; + + /* If our buffer size is 0, there's nothing to do */ + if (buf_size == 0) + HGOTO_DONE(SUCCEED) + + if (NULL == (recv_buf = H5MM_malloc(buf_size))) + /* Push an error, but still participate in collective gather operation */ + HDONE_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't allocate receive buffer") + } + + /* Perform gather operation */ + if (allgather) { + if (MPI_SUCCESS != (mpi_code = MPI_Allgatherv(send_buf, send_count, send_type, recv_buf, recv_counts, + displacements, recv_type, comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Allgatherv failed", mpi_code) + } + else { + if (MPI_SUCCESS != (mpi_code = MPI_Gatherv(send_buf, send_count, send_type, recv_buf, recv_counts, + displacements, recv_type, root, comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Gatherv failed", mpi_code) + } + + if (allgather || (mpi_rank == root)) { + *out_buf = recv_buf; + *out_buf_num_entries = recv_buf_num_entries; + } + +done: + if (ret_value < 0) { + if (recv_buf) + H5MM_free(recv_buf); + } + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5_mpio_gatherv_alloc() */ + +/*------------------------------------------------------------------------- + * Function: H5_mpio_gatherv_alloc_simple + * + * Purpose: A slightly simplified interface to H5_mpio_gatherv_alloc + * which calculates the receive counts and receive buffer + * displacements for the caller. + * + * Notes: This routine is collective across `comm`. + * + * Return: Non-negative on success/Negative on failure + * + *------------------------------------------------------------------------- + */ +herr_t +H5_mpio_gatherv_alloc_simple(void *send_buf, int send_count, MPI_Datatype send_type, MPI_Datatype recv_type, + hbool_t allgather, int root, MPI_Comm comm, int mpi_rank, int mpi_size, + void **out_buf, size_t *out_buf_num_entries) +{ + int * recv_counts_disps_array = NULL; + int mpi_code; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI(FAIL) + + HDassert(send_buf || send_count == 0); + if (allgather || (mpi_rank == root)) + HDassert(out_buf && out_buf_num_entries); + + /* + * Allocate array to store the receive counts of each rank, as well as + * the displacements into the final array where each rank will place + * their data. The first half of the array contains the receive counts + * (in rank order), while the latter half contains the displacements + * (also in rank order). + */ + if (allgather || (mpi_rank == root)) { + if (NULL == + (recv_counts_disps_array = H5MM_malloc(2 * (size_t)mpi_size * sizeof(*recv_counts_disps_array)))) + /* Push an error, but still participate in collective gather operation */ + HDONE_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate receive counts and displacements array") + } + + /* Collect each rank's send count to interested ranks */ + if (allgather) { + if (MPI_SUCCESS != + (mpi_code = MPI_Allgather(&send_count, 1, MPI_INT, recv_counts_disps_array, 1, MPI_INT, comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code) + } + else { + if (MPI_SUCCESS != + (mpi_code = MPI_Gather(&send_count, 1, MPI_INT, recv_counts_disps_array, 1, MPI_INT, root, comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Gather failed", mpi_code) + } + + /* Set the displacements into the receive buffer for the gather operation */ + if (allgather || (mpi_rank == root)) { + size_t i; + int * displacements_ptr; + + displacements_ptr = &recv_counts_disps_array[mpi_size]; + + *displacements_ptr = 0; + for (i = 1; i < (size_t)mpi_size; i++) + displacements_ptr[i] = displacements_ptr[i - 1] + recv_counts_disps_array[i - 1]; + } + + /* Perform gather operation */ + if (H5_mpio_gatherv_alloc(send_buf, send_count, send_type, recv_counts_disps_array, + &recv_counts_disps_array[mpi_size], recv_type, allgather, root, comm, mpi_rank, + mpi_size, out_buf, out_buf_num_entries) < 0) + HGOTO_ERROR(H5E_LIB, H5E_CANTGATHER, FAIL, "can't gather data") + +done: + if (recv_counts_disps_array) + H5MM_free(recv_counts_disps_array); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5_mpio_gatherv_alloc_simple() */ + #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5private.h b/src/H5private.h index 68aabc2..d67163f 100644 --- a/src/H5private.h +++ b/src/H5private.h @@ -387,6 +387,25 @@ #define HSSIZET_MAX ((hssize_t)LLONG_MAX) #define HSSIZET_MIN (~(HSSIZET_MAX)) +#ifdef H5_HAVE_PARALLEL + +/* Define a type for safely sending size_t values with MPI */ +#if SIZE_MAX == UCHAR_MAX +#define H5_SIZE_T_AS_MPI_TYPE MPI_UNSIGNED_CHAR +#elif SIZE_MAX == USHRT_MAX +#define H5_SIZE_T_AS_MPI_TYPE MPI_UNSIGNED_SHORT +#elif SIZE_MAX == UINT_MAX +#define H5_SIZE_T_AS_MPI_TYPE MPI_UNSIGNED +#elif SIZE_MAX == ULONG_MAX +#define H5_SIZE_T_AS_MPI_TYPE MPI_UNSIGNED_LONG +#elif SIZE_MAX == ULLONG_MAX +#define H5_SIZE_T_AS_MPI_TYPE MPI_UNSIGNED_LONG_LONG +#else +#error "no suitable MPI type for size_t" +#endif + +#endif /* H5_HAVE_PARALLEL */ + /* * Types and max sizes for POSIX I/O. * OS X (Darwin) is odd since the max I/O size does not match the types. @@ -508,6 +527,9 @@ #define H5_GCC_CLANG_DIAG_ON(x) #endif +/* Function pointer typedef for qsort */ +typedef int (*H5_sort_func_cb_t)(const void *, const void *); + /* Typedefs and functions for timing certain parts of the library. */ /* A set of elapsed/user/system times emitted as a time point by the @@ -2617,6 +2639,14 @@ H5_DLL herr_t H5_mpi_comm_cmp(MPI_Comm comm1, MPI_Comm comm2, int *result); H5_DLL herr_t H5_mpi_info_cmp(MPI_Info info1, MPI_Info info2, int *result); H5_DLL herr_t H5_mpio_create_large_type(hsize_t num_elements, MPI_Aint stride_bytes, MPI_Datatype old_type, MPI_Datatype *new_type); +H5_DLL herr_t H5_mpio_gatherv_alloc(void *send_buf, int send_count, MPI_Datatype send_type, + const int recv_counts[], const int displacements[], + MPI_Datatype recv_type, hbool_t allgather, int root, MPI_Comm comm, + int mpi_rank, int mpi_size, void **out_buf, size_t *out_buf_num_entries); +H5_DLL herr_t H5_mpio_gatherv_alloc_simple(void *send_buf, int send_count, MPI_Datatype send_type, + MPI_Datatype recv_type, hbool_t allgather, int root, MPI_Comm comm, + int mpi_rank, int mpi_size, void **out_buf, + size_t *out_buf_num_entries); #endif /* H5_HAVE_PARALLEL */ /* Functions for debugging */ diff --git a/src/H5public.h b/src/H5public.h index 6a3911c..037501b 100644 --- a/src/H5public.h +++ b/src/H5public.h @@ -289,6 +289,11 @@ typedef long long ssize_t; * \internal Defined as a (minimum) 64-bit integer type. */ typedef uint64_t hsize_t; + +#ifdef H5_HAVE_PARALLEL +#define HSIZE_AS_MPI_TYPE MPI_UINT64_T +#endif + /** * The size of file objects. Used when negative values are needed to indicate errors. * @@ -323,7 +328,7 @@ typedef uint64_t haddr_t; #define HADDR_MAX (HADDR_UNDEF - 1) #ifdef H5_HAVE_PARALLEL -#define HADDR_AS_MPI_TYPE MPI_LONG_LONG_INT +#define HADDR_AS_MPI_TYPE MPI_UINT64_T #endif //! <!-- [H5_iter_order_t_snip] --> diff --git a/testpar/t_2Gio.c b/testpar/t_2Gio.c index 2be4ae4..911be2c 100644 --- a/testpar/t_2Gio.c +++ b/testpar/t_2Gio.c @@ -3047,7 +3047,7 @@ compress_readAll(void) nerrors++; } -#if MPI_VERSION >= 3 +#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES ret = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, xfer_plist, data_read); VRFY((ret >= 0), "H5Dwrite succeeded"); #endif @@ -3853,12 +3853,6 @@ actual_io_mode_tests(void) * TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_EXTERNAL: * Test for Externl-File storage as the cause of breaking collective I/O. * - * TEST_FILTERS: - * Test for using filter (checksum) as the cause of breaking collective I/O. - * Note: TEST_FILTERS mode will not work until H5Dcreate and H5write is supported for mpio and filter - * feature. Use test_no_collective_cause_mode_filter() function instead. - * - * * Programmer: Jonathan Kim * Date: Aug, 2012 */ @@ -3898,9 +3892,6 @@ test_no_collective_cause_mode(int selection_mode) hid_t file_space = -1; hsize_t chunk_dims[MAX_RANK]; herr_t ret; -#ifdef LATER /* fletcher32 */ - H5Z_filter_t filter_info; -#endif /* LATER */ /* set to global value as default */ int l_facc_type = facc_type; char message[256]; @@ -3932,21 +3923,6 @@ test_no_collective_cause_mode(int selection_mode) is_chunked = 0; } -#ifdef LATER /* fletcher32 */ - if (selection_mode & TEST_FILTERS) { - ret = H5Zfilter_avail(H5Z_FILTER_FLETCHER32); - VRFY((ret >= 0), "Fletcher32 filter is available.\n"); - - ret = H5Zget_filter_info(H5Z_FILTER_FLETCHER32, &filter_info); - VRFY(((filter_info & H5Z_FILTER_CONFIG_ENCODE_ENABLED) || - (filter_info & H5Z_FILTER_CONFIG_DECODE_ENABLED)), - "Fletcher32 filter encoding and decoding available.\n"); - - ret = H5Pset_fletcher32(dcpl); - VRFY((ret >= 0), "set filter (flecher32) succeeded"); - } -#endif /* LATER */ - if (selection_mode & TEST_NOT_SIMPLE_OR_SCALAR_DATASPACES) { sid = H5Screate(H5S_NULL); VRFY((sid >= 0), "H5Screate_simple succeeded"); @@ -4022,14 +3998,6 @@ test_no_collective_cause_mode(int selection_mode) no_collective_cause_global_expected |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET; } -#ifdef LATER /* fletcher32 */ - if (selection_mode & TEST_FILTERS) { - test_name = "Broken Collective I/O - Filter is required"; - no_collective_cause_local_expected |= H5D_MPIO_FILTERS; - no_collective_cause_global_expected |= H5D_MPIO_FILTERS; - } -#endif /* LATER */ - if (selection_mode & TEST_COLLECTIVE) { test_name = "Broken Collective I/O - Not Broken"; no_collective_cause_local_expected = H5D_MPIO_COLLECTIVE; @@ -4166,240 +4134,6 @@ test_no_collective_cause_mode(int selection_mode) return; } -#if 0 -/* - * Function: test_no_collective_cause_mode_filter - * - * Purpose: - * Test specific for using filter as a caus of broken collective I/O and - * checks that the H5Pget_mpio_no_collective_cause properties in the DXPL - * have the correct values. - * - * NOTE: - * This is a temporary function. - * test_no_collective_cause_mode(TEST_FILTERS) will replace this when - * H5Dcreate and H5write support for mpio and filter feature. - * - * Input: - * TEST_FILTERS_READ: - * Test for using filter (checksum) as the cause of breaking collective I/O. - * - * Programmer: Jonathan Kim - * Date: Aug, 2012 - */ -static void -test_no_collective_cause_mode_filter(int selection_mode) -{ - uint32_t no_collective_cause_local_read = 0; - uint32_t no_collective_cause_local_expected = 0; - uint32_t no_collective_cause_global_read = 0; - uint32_t no_collective_cause_global_expected = 0; - - const char * filename; - const char * test_name; - hbool_t is_chunked=1; - int mpi_size = -1; - int mpi_rank = -1; - int length; - int * buffer; - int i; - MPI_Comm mpi_comm = MPI_COMM_NULL; - MPI_Info mpi_info = MPI_INFO_NULL; - hid_t fid = -1; - hid_t sid = -1; - hid_t dataset = -1; - hid_t data_type = H5T_NATIVE_INT; - hid_t fapl_write = -1; - hid_t fapl_read = -1; - hid_t dcpl = -1; - hid_t dxpl = -1; - hsize_t dims[MAX_RANK]; - hid_t mem_space = -1; - hid_t file_space = -1; - hsize_t chunk_dims[MAX_RANK]; - herr_t ret; -#ifdef LATER /* fletcher32 */ - H5Z_filter_t filter_info; -#endif /* LATER */ - char message[256]; - - /* Set up MPI parameters */ - MPI_Comm_size(test_comm, &mpi_size); - MPI_Comm_rank(test_comm, &mpi_rank); - - MPI_Barrier(test_comm); - - HDassert(mpi_size >= 1); - - mpi_comm = test_comm; - mpi_info = MPI_INFO_NULL; - - /* Create the dataset creation plist */ - dcpl = H5Pcreate(H5P_DATASET_CREATE); - VRFY((dcpl >= 0), "dataset creation plist created successfully"); - - if (selection_mode == TEST_FILTERS_READ ) { -#ifdef LATER /* fletcher32 */ - ret = H5Zfilter_avail(H5Z_FILTER_FLETCHER32); - VRFY ((ret >=0 ), "Fletcher32 filter is available.\n"); - - ret = H5Zget_filter_info (H5Z_FILTER_FLETCHER32, (unsigned int *) &filter_info); - VRFY ( ( (filter_info & H5Z_FILTER_CONFIG_ENCODE_ENABLED) || (filter_info & H5Z_FILTER_CONFIG_DECODE_ENABLED) ) , "Fletcher32 filter encoding and decoding available.\n"); - - ret = H5Pset_fletcher32(dcpl); - VRFY((ret >= 0),"set filter (flecher32) succeeded"); -#endif /* LATER */ - } - else { - VRFY(0, "Unexpected mode, only test for TEST_FILTERS_READ."); - } - - /* Create the basic Space */ - dims[0] = dim0; - dims[1] = dim1; - sid = H5Screate_simple (MAX_RANK, dims, NULL); - VRFY((sid >= 0), "H5Screate_simple succeeded"); - - - filename = (const char *)GetTestParameters(); - HDassert(filename != NULL); - - /* Setup the file access template */ - fapl_write = create_faccess_plist(mpi_comm, mpi_info, FACC_DEFAULT); - VRFY((fapl_write >= 0), "create_faccess_plist() succeeded"); - - fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_write); - VRFY((fid >= 0), "H5Fcreate succeeded"); - - /* If we are not testing contiguous datasets */ - if(is_chunked) { - /* Set up chunk information. */ - chunk_dims[0] = dims[0]/mpi_size; - chunk_dims[1] = dims[1]; - ret = H5Pset_chunk(dcpl, 2, chunk_dims); - VRFY((ret >= 0),"chunk creation property list succeeded"); - } - - - /* Create the dataset */ - dataset = H5Dcreate2(fid, DSET_NOCOLCAUSE, data_type, sid, H5P_DEFAULT, dcpl, H5P_DEFAULT); - VRFY((dataset >= 0), "H5Dcreate2() dataset succeeded"); - -#ifdef LATER /* fletcher32 */ - /* Set expected cause */ - test_name = "Broken Collective I/O - Filter is required"; - no_collective_cause_local_expected = H5D_MPIO_FILTERS; - no_collective_cause_global_expected = H5D_MPIO_FILTERS; -#endif /* LATER */ - - /* Get the file dataspace */ - file_space = H5Dget_space(dataset); - VRFY((file_space >= 0), "H5Dget_space succeeded"); - - /* Create the memory dataspace */ - mem_space = H5Screate_simple (MAX_RANK, dims, NULL); - VRFY((mem_space >= 0), "mem_space created"); - - /* Get the number of elements in the selection */ - length = dim0 * dim1; - - /* Allocate and initialize the buffer */ - buffer = (int *)HDmalloc(sizeof(int) * length); - VRFY((buffer != NULL), "HDmalloc of buffer succeeded"); - for(i = 0; i < length; i++) - buffer[i] = i; - - /* Set up the dxpl for the write */ - dxpl = H5Pcreate(H5P_DATASET_XFER); - VRFY((dxpl >= 0), "H5Pcreate(H5P_DATASET_XFER) succeeded"); - - if (selection_mode == TEST_FILTERS_READ) { - /* To test read in collective I/O mode , write in independent mode - * because write fails with mpio + filter */ - ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_INDEPENDENT); - VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); - } - else { - /* To test write in collective I/O mode. */ - ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE); - VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); - } - - - /* Write */ - ret = H5Dwrite(dataset, data_type, mem_space, file_space, dxpl, buffer); - - if(ret < 0) H5Eprint2(H5E_DEFAULT, stdout); - VRFY((ret >= 0), "H5Dwrite() dataset multichunk write succeeded"); - - - /* Make a copy of the dxpl to test the read operation */ - dxpl = H5Pcopy(dxpl); - VRFY((dxpl >= 0), "H5Pcopy succeeded"); - - if (dataset) - H5Dclose(dataset); - if (fapl_write) - H5Pclose(fapl_write); - if (fid) - H5Fclose(fid); - - - /*--------------------- - * Test Read access - *---------------------*/ - - /* Setup the file access template */ - fapl_read = create_faccess_plist(mpi_comm, mpi_info, facc_type); - VRFY((fapl_read >= 0), "create_faccess_plist() succeeded"); - - fid = H5Fopen (filename, H5F_ACC_RDONLY, fapl_read); - dataset = H5Dopen2 (fid, DSET_NOCOLCAUSE, H5P_DEFAULT); - - /* Set collective I/O properties in the dxpl. */ - ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE); - VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); - - /* Read */ - ret = H5Dread(dataset, data_type, mem_space, file_space, dxpl, buffer); - - if(ret < 0) H5Eprint2(H5E_DEFAULT, stdout); - VRFY((ret >= 0), "H5Dread() dataset multichunk read succeeded"); - - /* Get the cause of broken collective I/O */ - ret = H5Pget_mpio_no_collective_cause (dxpl, &no_collective_cause_local_read, &no_collective_cause_global_read); - VRFY((ret >= 0), "retrieving no collective cause succeeded" ); - - /* Test values */ - HDmemset (message, 0, sizeof (message)); - HDsprintf(message, "Local cause of Broken Collective I/O has the correct value for %s.\n",test_name); - VRFY((no_collective_cause_local_read == (uint32_t)no_collective_cause_local_expected), message); - HDmemset (message, 0, sizeof (message)); - HDsprintf(message, "Global cause of Broken Collective I/O has the correct value for %s.\n",test_name); - VRFY((no_collective_cause_global_read == (uint32_t)no_collective_cause_global_expected), message); - - /* Release some resources */ - if (sid) - H5Sclose(sid); - if (fapl_read) - H5Pclose(fapl_read); - if (dcpl) - H5Pclose(dcpl); - if (dxpl) - H5Pclose(dxpl); - if (dataset) - H5Dclose(dataset); - if (mem_space) - H5Sclose(mem_space); - if (file_space) - H5Sclose(file_space); - if (fid) - H5Fclose(fid); - HDfree(buffer); - return; -} -#endif - /* Function: no_collective_cause_tests * * Purpose: Tests cases for broken collective IO. @@ -4420,13 +4154,6 @@ no_collective_cause_tests(void) test_no_collective_cause_mode(TEST_NOT_SIMPLE_OR_SCALAR_DATASPACES); test_no_collective_cause_mode(TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_COMPACT); test_no_collective_cause_mode(TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_EXTERNAL); -#ifdef LATER /* fletcher32 */ - /* TODO: use this instead of below TEST_FILTERS_READ when H5Dcreate and - * H5Dwrite is ready for mpio + filter feature. - */ - /* test_no_collective_cause_mode (TEST_FILTERS); */ - test_no_collective_cause_mode_filter(TEST_FILTERS_READ); -#endif /* LATER */ /* * Test combined causes diff --git a/testpar/t_dset.c b/testpar/t_dset.c index 2aade32..8616bef 100644 --- a/testpar/t_dset.c +++ b/testpar/t_dset.c @@ -2605,7 +2605,7 @@ compress_readAll(void) nerrors++; } -#if MPI_VERSION >= 3 +#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES ret = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, xfer_plist, data_read); VRFY((ret >= 0), "H5Dwrite succeeded"); #endif @@ -3418,12 +3418,6 @@ actual_io_mode_tests(void) * TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_EXTERNAL: * Test for Externl-File storage as the cause of breaking collective I/O. * - * TEST_FILTERS: - * Test for using filter (checksum) as the cause of breaking collective I/O. - * Note: TEST_FILTERS mode will not work until H5Dcreate and H5write is supported for mpio and filter - * feature. Use test_no_collective_cause_mode_filter() function instead. - * - * * Programmer: Jonathan Kim * Date: Aug, 2012 */ @@ -3465,9 +3459,6 @@ test_no_collective_cause_mode(int selection_mode) hid_t file_space = -1; hsize_t chunk_dims[RANK]; herr_t ret; -#ifdef LATER /* fletcher32 */ - H5Z_filter_t filter_info; -#endif /* LATER */ /* set to global value as default */ int l_facc_type = facc_type; char message[256]; @@ -3499,21 +3490,6 @@ test_no_collective_cause_mode(int selection_mode) is_chunked = 0; } -#ifdef LATER /* fletcher32 */ - if (selection_mode & TEST_FILTERS) { - ret = H5Zfilter_avail(H5Z_FILTER_FLETCHER32); - VRFY((ret >= 0), "Fletcher32 filter is available.\n"); - - ret = H5Zget_filter_info(H5Z_FILTER_FLETCHER32, &filter_info); - VRFY(((filter_info & H5Z_FILTER_CONFIG_ENCODE_ENABLED) || - (filter_info & H5Z_FILTER_CONFIG_DECODE_ENABLED)), - "Fletcher32 filter encoding and decoding available.\n"); - - ret = H5Pset_fletcher32(dcpl); - VRFY((ret >= 0), "set filter (flecher32) succeeded"); - } -#endif /* LATER */ - if (selection_mode & TEST_NOT_SIMPLE_OR_SCALAR_DATASPACES) { sid = H5Screate(H5S_NULL); VRFY((sid >= 0), "H5Screate_simple succeeded"); @@ -3589,14 +3565,6 @@ test_no_collective_cause_mode(int selection_mode) no_collective_cause_global_expected |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET; } -#ifdef LATER /* fletcher32 */ - if (selection_mode & TEST_FILTERS) { - test_name = "Broken Collective I/O - Filter is required"; - no_collective_cause_local_expected |= H5D_MPIO_FILTERS; - no_collective_cause_global_expected |= H5D_MPIO_FILTERS; - } -#endif /* LATER */ - if (selection_mode & TEST_COLLECTIVE) { test_name = "Broken Collective I/O - Not Broken"; no_collective_cause_local_expected = H5D_MPIO_COLLECTIVE; @@ -3735,242 +3703,6 @@ test_no_collective_cause_mode(int selection_mode) return; } -/* - * Function: test_no_collective_cause_mode_filter - * - * Purpose: - * Test specific for using filter as a caus of broken collective I/O and - * checks that the H5Pget_mpio_no_collective_cause properties in the DXPL - * have the correct values. - * - * NOTE: - * This is a temporary function. - * test_no_collective_cause_mode(TEST_FILTERS) will replace this when - * H5Dcreate and H5write support for mpio and filter feature. - * - * Input: - * TEST_FILTERS_READ: - * Test for using filter (checksum) as the cause of breaking collective I/O. - * - * Programmer: Jonathan Kim - * Date: Aug, 2012 - */ -#ifdef LATER -static void -test_no_collective_cause_mode_filter(int selection_mode) -{ - uint32_t no_collective_cause_local_read = 0; - uint32_t no_collective_cause_local_expected = 0; - uint32_t no_collective_cause_global_read = 0; - uint32_t no_collective_cause_global_expected = 0; - - const char *filename; - const char *test_name = "I/O"; - hbool_t is_chunked = 1; - int mpi_size = -1; - int mpi_rank = -1; - int length; - int * buffer; - int i; - MPI_Comm mpi_comm = MPI_COMM_NULL; - MPI_Info mpi_info = MPI_INFO_NULL; - hid_t fid = -1; - hid_t sid = -1; - hid_t dataset = -1; - hid_t data_type = H5T_NATIVE_INT; - hid_t fapl_write = -1; - hid_t fapl_read = -1; - hid_t dcpl = -1; - hid_t dxpl = -1; - hsize_t dims[RANK]; - hid_t mem_space = -1; - hid_t file_space = -1; - hsize_t chunk_dims[RANK]; - herr_t ret; -#ifdef LATER /* fletcher32 */ - H5Z_filter_t filter_info; -#endif /* LATER */ - char message[256]; - - /* Set up MPI parameters */ - MPI_Comm_size(MPI_COMM_WORLD, &mpi_size); - MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); - - MPI_Barrier(MPI_COMM_WORLD); - - HDassert(mpi_size >= 1); - - mpi_comm = MPI_COMM_WORLD; - mpi_info = MPI_INFO_NULL; - - /* Create the dataset creation plist */ - dcpl = H5Pcreate(H5P_DATASET_CREATE); - VRFY((dcpl >= 0), "dataset creation plist created successfully"); - - if (selection_mode == TEST_FILTERS_READ) { -#ifdef LATER /* fletcher32 */ - ret = H5Zfilter_avail(H5Z_FILTER_FLETCHER32); - VRFY((ret >= 0), "Fletcher32 filter is available.\n"); - - ret = H5Zget_filter_info(H5Z_FILTER_FLETCHER32, (unsigned int *)&filter_info); - VRFY(((filter_info & H5Z_FILTER_CONFIG_ENCODE_ENABLED) || - (filter_info & H5Z_FILTER_CONFIG_DECODE_ENABLED)), - "Fletcher32 filter encoding and decoding available.\n"); - - ret = H5Pset_fletcher32(dcpl); - VRFY((ret >= 0), "set filter (flecher32) succeeded"); -#endif /* LATER */ - } - else { - VRFY(0, "Unexpected mode, only test for TEST_FILTERS_READ."); - } - - /* Create the basic Space */ - dims[0] = (hsize_t)dim0; - dims[1] = (hsize_t)dim1; - sid = H5Screate_simple(RANK, dims, NULL); - VRFY((sid >= 0), "H5Screate_simple succeeded"); - - filename = (const char *)GetTestParameters(); - HDassert(filename != NULL); - - /* Setup the file access template */ - fapl_write = create_faccess_plist(mpi_comm, mpi_info, FACC_DEFAULT); - VRFY((fapl_write >= 0), "create_faccess_plist() succeeded"); - - fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_write); - VRFY((fid >= 0), "H5Fcreate succeeded"); - - /* If we are not testing contiguous datasets */ - if (is_chunked) { - /* Set up chunk information. */ - chunk_dims[0] = dims[0] / (hsize_t)mpi_size; - chunk_dims[1] = dims[1]; - ret = H5Pset_chunk(dcpl, 2, chunk_dims); - VRFY((ret >= 0), "chunk creation property list succeeded"); - } - - /* Create the dataset */ - dataset = H5Dcreate2(fid, DSET_NOCOLCAUSE, data_type, sid, H5P_DEFAULT, dcpl, H5P_DEFAULT); - VRFY((dataset >= 0), "H5Dcreate2() dataset succeeded"); - -#ifdef LATER /* fletcher32 */ - /* Set expected cause */ - test_name = "Broken Collective I/O - Filter is required"; - no_collective_cause_local_expected = H5D_MPIO_FILTERS; - no_collective_cause_global_expected = H5D_MPIO_FILTERS; -#endif /* LATER */ - - /* Get the file dataspace */ - file_space = H5Dget_space(dataset); - VRFY((file_space >= 0), "H5Dget_space succeeded"); - - /* Create the memory dataspace */ - mem_space = H5Screate_simple(RANK, dims, NULL); - VRFY((mem_space >= 0), "mem_space created"); - - /* Get the number of elements in the selection */ - length = dim0 * dim1; - - /* Allocate and initialize the buffer */ - buffer = (int *)HDmalloc(sizeof(int) * length); - VRFY((buffer != NULL), "HDmalloc of buffer succeeded"); - for (i = 0; i < length; i++) - buffer[i] = i; - - /* Set up the dxpl for the write */ - dxpl = H5Pcreate(H5P_DATASET_XFER); - VRFY((dxpl >= 0), "H5Pcreate(H5P_DATASET_XFER) succeeded"); - - if (selection_mode == TEST_FILTERS_READ) { - /* To test read in collective I/O mode , write in independent mode - * because write fails with mpio + filter */ - ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_INDEPENDENT); - VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); - } - else { - /* To test write in collective I/O mode. */ - ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE); - VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); - } - - /* Write */ - ret = H5Dwrite(dataset, data_type, mem_space, file_space, dxpl, buffer); - - if (ret < 0) - H5Eprint2(H5E_DEFAULT, stdout); - VRFY((ret >= 0), "H5Dwrite() dataset multichunk write succeeded"); - - /* Make a copy of the dxpl to test the read operation */ - dxpl = H5Pcopy(dxpl); - VRFY((dxpl >= 0), "H5Pcopy succeeded"); - - if (dataset) - H5Dclose(dataset); - if (fapl_write) - H5Pclose(fapl_write); - if (fid) - H5Fclose(fid); - - /*--------------------- - * Test Read access - *---------------------*/ - - /* Setup the file access template */ - fapl_read = create_faccess_plist(mpi_comm, mpi_info, facc_type); - VRFY((fapl_read >= 0), "create_faccess_plist() succeeded"); - - fid = H5Fopen(filename, H5F_ACC_RDONLY, fapl_read); - dataset = H5Dopen2(fid, DSET_NOCOLCAUSE, H5P_DEFAULT); - - /* Set collective I/O properties in the dxpl. */ - ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE); - VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); - - /* Read */ - ret = H5Dread(dataset, data_type, mem_space, file_space, dxpl, buffer); - - if (ret < 0) - H5Eprint2(H5E_DEFAULT, stdout); - VRFY((ret >= 0), "H5Dread() dataset multichunk read succeeded"); - - /* Get the cause of broken collective I/O */ - ret = H5Pget_mpio_no_collective_cause(dxpl, &no_collective_cause_local_read, - &no_collective_cause_global_read); - VRFY((ret >= 0), "retrieving no collective cause succeeded"); - - /* Test values */ - HDmemset(message, 0, sizeof(message)); - HDsnprintf(message, sizeof(message), - "Local cause of Broken Collective I/O has the correct value for %s.\n", test_name); - VRFY((no_collective_cause_local_read == (uint32_t)no_collective_cause_local_expected), message); - HDmemset(message, 0, sizeof(message)); - HDsnprintf(message, sizeof(message), - "Global cause of Broken Collective I/O has the correct value for %s.\n", test_name); - VRFY((no_collective_cause_global_read == (uint32_t)no_collective_cause_global_expected), message); - - /* Release some resources */ - if (sid) - H5Sclose(sid); - if (fapl_read) - H5Pclose(fapl_read); - if (dcpl) - H5Pclose(dcpl); - if (dxpl) - H5Pclose(dxpl); - if (dataset) - H5Dclose(dataset); - if (mem_space) - H5Sclose(mem_space); - if (file_space) - H5Sclose(file_space); - if (fid) - H5Fclose(fid); - HDfree(buffer); - return; -} -#endif - /* Function: no_collective_cause_tests * * Purpose: Tests cases for broken collective IO. @@ -3991,13 +3723,6 @@ no_collective_cause_tests(void) test_no_collective_cause_mode(TEST_NOT_SIMPLE_OR_SCALAR_DATASPACES); test_no_collective_cause_mode(TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_COMPACT); test_no_collective_cause_mode(TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_EXTERNAL); -#ifdef LATER /* fletcher32 */ - /* TODO: use this instead of below TEST_FILTERS_READ when H5Dcreate and - * H5Dwrite is ready for mpio + filter feature. - */ - /* test_no_collective_cause_mode (TEST_FILTERS); */ - test_no_collective_cause_mode_filter(TEST_FILTERS_READ); -#endif /* LATER */ /* * Test combined causes diff --git a/testpar/t_filters_parallel.c b/testpar/t_filters_parallel.c index 78af0fb..8a55519 100644 --- a/testpar/t_filters_parallel.c +++ b/testpar/t_filters_parallel.c @@ -26,73 +26,139 @@ const char *FILENAME[] = {"t_filters_parallel", NULL}; char filenames[1][256]; +static MPI_Comm comm = MPI_COMM_WORLD; +static MPI_Info info = MPI_INFO_NULL; +static int mpi_rank; +static int mpi_size; + int nerrors = 0; -size_t cur_filter_idx = 0; -#define GZIP_INDEX 0 -#define FLETCHER32_INDEX 1 +/* Arrays of filter ID values and filter names (should match each other) */ +H5Z_filter_t filterIDs[] = { + H5Z_FILTER_DEFLATE, H5Z_FILTER_SHUFFLE, H5Z_FILTER_FLETCHER32, + H5Z_FILTER_SZIP, H5Z_FILTER_NBIT, H5Z_FILTER_SCALEOFFSET, +}; + +const char *filterNames[] = {"Deflate", "Shuffle", "Fletcher32", "SZIP", "Nbit", "ScaleOffset"}; -#define ARRAY_SIZE(a) sizeof(a) / sizeof(a[0]) +/* Function pointer typedef for test functions */ +typedef void (*test_func)(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id, + hid_t dxpl_id); + +/* Typedef for filter arguments for user-defined filters */ +typedef struct filter_options_t { + unsigned int flags; + size_t cd_nelmts; + const unsigned int cd_values[]; +} filter_options_t; /* - * Used to check if a filter is available before running a test. + * Enum for verify_space_alloc_status which specifies + * how many chunks have been written to in a dataset */ -#define CHECK_CUR_FILTER_AVAIL() \ - { \ - htri_t filter_is_avail; \ - \ - if (cur_filter_idx == GZIP_INDEX) { \ - if ((filter_is_avail = H5Zfilter_avail(H5Z_FILTER_DEFLATE)) != TRUE) { \ - if (MAINPROCESS) { \ - HDputs(" - SKIPPED - Deflate filter not available"); \ - } \ - return; \ - } \ - } \ - } +typedef enum num_chunks_written_t { + DATASET_JUST_CREATED, + NO_CHUNKS_WRITTEN, + SOME_CHUNKS_WRITTEN, + ALL_CHUNKS_WRITTEN +} num_chunks_written_t; -static herr_t set_dcpl_filter(hid_t dcpl); +static herr_t set_dcpl_filter(hid_t dcpl_id, H5Z_filter_t filter_id, filter_options_t *filter_options); +static herr_t verify_space_alloc_status(hid_t dset_id, hid_t dcpl_id, num_chunks_written_t chunks_written); -#if MPI_VERSION >= 3 +#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES /* Tests for writing data in parallel */ -static void test_write_one_chunk_filtered_dataset(void); -static void test_write_filtered_dataset_no_overlap(void); -static void test_write_filtered_dataset_overlap(void); -static void test_write_filtered_dataset_single_no_selection(void); -static void test_write_filtered_dataset_all_no_selection(void); -static void test_write_filtered_dataset_point_selection(void); -static void test_write_filtered_dataset_interleaved_write(void); -static void test_write_transformed_filtered_dataset_no_overlap(void); -static void test_write_3d_filtered_dataset_no_overlap_separate_pages(void); -static void test_write_3d_filtered_dataset_no_overlap_same_pages(void); -static void test_write_3d_filtered_dataset_overlap(void); -static void test_write_cmpd_filtered_dataset_no_conversion_unshared(void); -static void test_write_cmpd_filtered_dataset_no_conversion_shared(void); -static void test_write_cmpd_filtered_dataset_type_conversion_unshared(void); -static void test_write_cmpd_filtered_dataset_type_conversion_shared(void); +static void test_write_one_chunk_filtered_dataset(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_write_filtered_dataset_no_overlap(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_write_filtered_dataset_no_overlap_partial(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_write_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_write_filtered_dataset_single_unlim_dim_no_overlap(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_write_filtered_dataset_single_unlim_dim_overlap(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_write_filtered_dataset_multi_unlim_dim_no_overlap(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_write_filtered_dataset_multi_unlim_dim_overlap(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_write_filtered_dataset_single_no_selection(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_write_filtered_dataset_all_no_selection(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_write_filtered_dataset_point_selection(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_write_filtered_dataset_interleaved_write(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_write_transformed_filtered_dataset_no_overlap(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_write_3d_filtered_dataset_no_overlap_separate_pages(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_write_3d_filtered_dataset_no_overlap_same_pages(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_write_3d_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_write_cmpd_filtered_dataset_no_conversion_unshared(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_write_cmpd_filtered_dataset_no_conversion_shared(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_write_cmpd_filtered_dataset_type_conversion_unshared(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_write_cmpd_filtered_dataset_type_conversion_shared(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); #endif /* Tests for reading data in parallel */ -static void test_read_one_chunk_filtered_dataset(void); -static void test_read_filtered_dataset_no_overlap(void); -static void test_read_filtered_dataset_overlap(void); -static void test_read_filtered_dataset_single_no_selection(void); -static void test_read_filtered_dataset_all_no_selection(void); -static void test_read_filtered_dataset_point_selection(void); -static void test_read_filtered_dataset_interleaved_read(void); -static void test_read_transformed_filtered_dataset_no_overlap(void); -static void test_read_3d_filtered_dataset_no_overlap_separate_pages(void); -static void test_read_3d_filtered_dataset_no_overlap_same_pages(void); -static void test_read_3d_filtered_dataset_overlap(void); -static void test_read_cmpd_filtered_dataset_no_conversion_unshared(void); -static void test_read_cmpd_filtered_dataset_no_conversion_shared(void); -static void test_read_cmpd_filtered_dataset_type_conversion_unshared(void); -static void test_read_cmpd_filtered_dataset_type_conversion_shared(void); - -#if MPI_VERSION >= 3 -/* Other miscellaneous tests */ -static void test_shrinking_growing_chunks(void); -#endif +static void test_read_one_chunk_filtered_dataset(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_read_filtered_dataset_no_overlap(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_read_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_read_filtered_dataset_single_no_selection(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_read_filtered_dataset_all_no_selection(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_read_filtered_dataset_point_selection(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_read_filtered_dataset_interleaved_read(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_read_transformed_filtered_dataset_no_overlap(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_read_3d_filtered_dataset_no_overlap_separate_pages(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_read_3d_filtered_dataset_no_overlap_same_pages(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_read_3d_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id); +static void test_read_cmpd_filtered_dataset_no_conversion_unshared(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_read_cmpd_filtered_dataset_no_conversion_shared(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_read_cmpd_filtered_dataset_type_conversion_unshared(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_read_cmpd_filtered_dataset_type_conversion_shared(const char * parent_group, + H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); /* * Tests for attempting to round-trip the data going from @@ -103,21 +169,40 @@ static void test_shrinking_growing_chunks(void); * * written in parallel -> read serially */ -static void test_write_serial_read_parallel(void); -#if MPI_VERSION >= 3 -static void test_write_parallel_read_serial(void); -#endif +static void test_write_serial_read_parallel(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); -static MPI_Comm comm = MPI_COMM_WORLD; -static MPI_Info info = MPI_INFO_NULL; -static int mpi_rank; -static int mpi_size; +#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES +static void test_write_parallel_read_serial(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); -static void (*tests[])(void) = { -#if MPI_VERSION >= 3 +/* Other miscellaneous tests */ +static void test_shrinking_growing_chunks(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_edge_chunks_no_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_edge_chunks_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_edge_chunks_partial_write(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_fill_values(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id, + hid_t dxpl_id); +static void test_fill_value_undefined(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +static void test_fill_time_never(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id); +#endif + +static test_func tests[] = { +#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES test_write_one_chunk_filtered_dataset, test_write_filtered_dataset_no_overlap, + test_write_filtered_dataset_no_overlap_partial, test_write_filtered_dataset_overlap, + test_write_filtered_dataset_single_unlim_dim_no_overlap, + test_write_filtered_dataset_single_unlim_dim_overlap, + test_write_filtered_dataset_multi_unlim_dim_no_overlap, + test_write_filtered_dataset_multi_unlim_dim_overlap, test_write_filtered_dataset_single_no_selection, test_write_filtered_dataset_all_no_selection, test_write_filtered_dataset_point_selection, @@ -147,33 +232,168 @@ static void (*tests[])(void) = { test_read_cmpd_filtered_dataset_type_conversion_unshared, test_read_cmpd_filtered_dataset_type_conversion_shared, test_write_serial_read_parallel, -#if MPI_VERSION >= 3 +#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES test_write_parallel_read_serial, test_shrinking_growing_chunks, + test_edge_chunks_no_overlap, + test_edge_chunks_overlap, + test_edge_chunks_partial_write, + test_fill_values, + test_fill_value_undefined, + test_fill_time_never, #endif }; /* * Function to call the appropriate HDF5 filter-setting function - * depending on the currently set index. Used to re-run the tests + * depending on the given filter ID. Used to re-run the tests * with different filters to check that the data still comes back * correctly under a variety of circumstances, such as the * Fletcher32 checksum filter increasing the size of the chunk. */ static herr_t -set_dcpl_filter(hid_t dcpl) +set_dcpl_filter(hid_t dcpl_id, H5Z_filter_t filter_id, filter_options_t *filter_options) +{ + switch (filter_id) { + case H5Z_FILTER_DEFLATE: + return H5Pset_deflate(dcpl_id, DEFAULT_DEFLATE_LEVEL); + case H5Z_FILTER_SHUFFLE: + return H5Pset_shuffle(dcpl_id); + case H5Z_FILTER_FLETCHER32: + return H5Pset_fletcher32(dcpl_id); + case H5Z_FILTER_SZIP: { + unsigned pixels_per_block = H5_SZIP_MAX_PIXELS_PER_BLOCK; + hsize_t chunk_dims[H5S_MAX_RANK] = {0}; + size_t i, chunk_nelemts; + + VRFY(H5Pget_chunk(dcpl_id, H5S_MAX_RANK, chunk_dims) >= 0, "H5Pget_chunk succeeded"); + + for (i = 0, chunk_nelemts = 1; i < H5S_MAX_RANK; i++) + if (chunk_dims[i] > 0) + chunk_nelemts *= chunk_dims[i]; + + if (chunk_nelemts < H5_SZIP_MAX_PIXELS_PER_BLOCK) { + /* + * Can't set SZIP for chunk of 1 data element. + * Pixels-per-block value must be both even + * and non-zero. + */ + if (chunk_nelemts == 1) + return SUCCEED; + + if ((chunk_nelemts % 2) == 0) + pixels_per_block = (unsigned)chunk_nelemts; + else + pixels_per_block = (unsigned)(chunk_nelemts - 1); + } + else + pixels_per_block = H5_SZIP_MAX_PIXELS_PER_BLOCK; + + return H5Pset_szip(dcpl_id, 0, pixels_per_block); + } + case H5Z_FILTER_NBIT: + return H5Pset_nbit(dcpl_id); + case H5Z_FILTER_SCALEOFFSET: + return H5Pset_scaleoffset(dcpl_id, H5Z_SO_INT, 0); + default: { + if (!filter_options) + return FAIL; + + return H5Pset_filter(dcpl_id, filter_id, filter_options->flags, filter_options->cd_nelmts, + filter_options->cd_values); + } + } +} + +/* + * Function to verify the status of dataset storage space allocation + * based on the dataset's allocation time setting and how many chunks + * in the dataset have been written to. + */ +static herr_t +verify_space_alloc_status(hid_t dset_id, hid_t dcpl_id, num_chunks_written_t chunks_written) { - switch (cur_filter_idx) { - case GZIP_INDEX: - return H5Pset_deflate(dcpl, DEFAULT_DEFLATE_LEVEL); - case FLETCHER32_INDEX: - return H5Pset_fletcher32(dcpl); - default: - return H5Pset_deflate(dcpl, DEFAULT_DEFLATE_LEVEL); + int nfilters; + herr_t ret_value = SUCCEED; + + VRFY(((nfilters = H5Pget_nfilters(dcpl_id)) >= 0), "H5Pget_nfilters succeeded"); + + /* + * Only verify space allocation status when there are filters + * in the dataset's filter pipeline. When filters aren't in the + * pipeline, the space allocation time and status can vary based + * on whether the file was created in parallel or serial mode. + */ + if (nfilters > 0) { + H5D_space_status_t space_status; + H5D_alloc_time_t alloc_time; + + VRFY((H5Pget_alloc_time(dcpl_id, &alloc_time) >= 0), "H5Pget_alloc_time succeeded"); + VRFY((H5Dget_space_status(dset_id, &space_status) >= 0), "H5Dget_space_status succeeded"); + + switch (alloc_time) { + case H5D_ALLOC_TIME_EARLY: + /* + * Early space allocation should always result in the + * full dataset storage space being allocated. + */ + VRFY(space_status == H5D_SPACE_STATUS_ALLOCATED, "verified space allocation status"); + break; + case H5D_ALLOC_TIME_LATE: + /* + * Late space allocation should always result in the + * full dataset storage space being allocated when + * the dataset gets written to. However, if the dataset + * is extended the dataset's space allocation status + * can become partly allocated until the dataset is + * written to again. + */ + if (chunks_written == SOME_CHUNKS_WRITTEN || chunks_written == ALL_CHUNKS_WRITTEN) + VRFY((space_status == H5D_SPACE_STATUS_ALLOCATED) || + (space_status == H5D_SPACE_STATUS_PART_ALLOCATED), + "verified space allocation status"); + else if (chunks_written == NO_CHUNKS_WRITTEN) + /* + * A special case where we wrote to a dataset that + * uses late space allocation, but the write was + * either a no-op (no selection in the dataset + * from any rank) or something caused the write to + * fail late in the process of performing the actual + * write. In either case, space should still have + * been allocated. + */ + VRFY(space_status == H5D_SPACE_STATUS_ALLOCATED, "verified space allocation status"); + else + VRFY(space_status == H5D_SPACE_STATUS_NOT_ALLOCATED, "verified space allocation status"); + break; + case H5D_ALLOC_TIME_DEFAULT: + case H5D_ALLOC_TIME_INCR: + /* + * Incremental space allocation should result in + * the dataset's storage space being incrementally + * allocated as chunks are written to. Once all chunks + * have been written to, the space allocation should be + * seen as fully allocated. + */ + if (chunks_written == SOME_CHUNKS_WRITTEN) + VRFY((space_status == H5D_SPACE_STATUS_PART_ALLOCATED), + "verified space allocation status"); + else if (chunks_written == ALL_CHUNKS_WRITTEN) + VRFY((space_status == H5D_SPACE_STATUS_ALLOCATED), "verified space allocation status"); + else + VRFY(space_status == H5D_SPACE_STATUS_NOT_ALLOCATED, "verified space allocation status"); + break; + default: + if (MAINPROCESS) + MESG("unknown space allocation time"); + MPI_Abort(MPI_COMM_WORLD, 1); + } } + + return ret_value; } -#if MPI_VERSION >= 3 +#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES /* * Tests parallel write of filtered data in the special * case where a dataset is composed of a single chunk. @@ -182,7 +402,8 @@ set_dcpl_filter(hid_t dcpl) * 02/01/2017 */ static void -test_write_one_chunk_filtered_dataset(void) +test_write_one_chunk_filtered_dataset(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *data = NULL; C_DATATYPE *read_buf = NULL; @@ -195,26 +416,18 @@ test_write_one_chunk_filtered_dataset(void) hsize_t count[WRITE_ONE_CHUNK_FILTERED_DATASET_DIMS]; hsize_t block[WRITE_ONE_CHUNK_FILTERED_DATASET_DIMS]; size_t i, data_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing write to one-chunk filtered dataset"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = (hsize_t)WRITE_ONE_CHUNK_FILTERED_DATASET_NROWS; @@ -231,19 +444,21 @@ test_write_one_chunk_filtered_dataset(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_ONE_CHUNK_FILTERED_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, WRITE_ONE_CHUNK_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, + dset_id = H5Dcreate2(group_id, WRITE_ONE_CHUNK_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); /* Each process defines the dataset selection in memory and writes @@ -293,15 +508,12 @@ test_write_one_chunk_filtered_dataset(void) ((C_DATATYPE)i / (WRITE_ONE_CHUNK_FILTERED_DATASET_CH_NROWS / mpi_size * WRITE_ONE_CHUNK_FILTERED_DATASET_CH_NCOLS)); - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - - VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0), + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + if (data) HDfree(data); @@ -311,10 +523,10 @@ test_write_one_chunk_filtered_dataset(void) read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - dset_id = H5Dopen2(file_id, "/" WRITE_ONE_CHUNK_FILTERED_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, WRITE_ONE_CHUNK_FILTERED_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); @@ -324,10 +536,11 @@ test_write_one_chunk_filtered_dataset(void) if (read_buf) HDfree(read_buf); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -343,7 +556,8 @@ test_write_one_chunk_filtered_dataset(void) * 02/01/2017 */ static void -test_write_filtered_dataset_no_overlap(void) +test_write_filtered_dataset_no_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *data = NULL; C_DATATYPE *read_buf = NULL; @@ -356,27 +570,18 @@ test_write_filtered_dataset_no_overlap(void) hsize_t count[WRITE_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS]; hsize_t block[WRITE_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS]; size_t i, data_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing write to unshared filtered chunks"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_NROWS; @@ -393,20 +598,22 @@ test_write_filtered_dataset_no_overlap(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, WRITE_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, + dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); /* Each process defines the dataset selection in memory and writes @@ -454,15 +661,168 @@ test_write_filtered_dataset_no_overlap(void) correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])) + (i / (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1]))); - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); - VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0), + if (data) + HDfree(data); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + /* Verify the correct data was written */ + read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); + VRFY((NULL != read_buf), "HDcalloc succeeded"); + + dset_id = H5Dopen2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); + + if (correct_buf) + HDfree(correct_buf); + if (read_buf) + HDfree(read_buf); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); + VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); + + return; +} + +/* + * Tests parallel write of filtered data in the case where only + * one process is writing to a particular chunk in the operation + * and that process only writes to part of a chunk. + */ +static void +test_write_filtered_dataset_no_overlap_partial(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) +{ + C_DATATYPE *data = NULL; + C_DATATYPE *read_buf = NULL; + C_DATATYPE *correct_buf = NULL; + hsize_t dataset_dims[WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS]; + hsize_t chunk_dims[WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS]; + hsize_t sel_dims[WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS]; + hsize_t start[WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS]; + hsize_t stride[WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS]; + hsize_t count[WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS]; + hsize_t block[WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS]; + size_t i, data_size, correct_buf_size; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID; + + if (MAINPROCESS) + HDputs("Testing partial write to unshared filtered chunks"); + + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); + VRFY((file_id >= 0), "Test file open succeeded"); + + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + + /* Create the dataspace for the dataset */ + dataset_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_NROWS; + dataset_dims[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_NCOLS; + chunk_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NROWS; + chunk_dims[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NCOLS; + sel_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NROWS; + sel_dims[1] = (hsize_t)(WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_NCOLS / + WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NCOLS); + + filespace = H5Screate_simple(WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS, dataset_dims, NULL); + VRFY((filespace >= 0), "File dataspace creation succeeded"); + + /* Create chunked dataset */ + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); + + VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS, chunk_dims) >= 0), + "Chunk size set"); + + /* Add test filter to the pipeline */ + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); + + dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_NAME, HDF5_DATATYPE_NAME, + filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset creation succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + + /* Each process defines the dataset selection in memory and writes + * it to the hyperslab in the file + */ + count[0] = 1; + count[1] = (hsize_t)(WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_NCOLS / + WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NCOLS); + stride[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NROWS; + stride[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NCOLS; + block[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NROWS; + block[1] = (hsize_t)1; + start[0] = ((hsize_t)mpi_rank * (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NROWS * count[0]); + start[1] = 0; + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]); + HDfflush(stdout); + } + + /* Select hyperslab in the file */ + filespace = H5Dget_space(dset_id); + VRFY((filespace >= 0), "File dataspace retrieval succeeded"); + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + /* Fill data buffer */ + data_size = sel_dims[0] * sel_dims[1] * sizeof(*data); + correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf); + + data = (C_DATATYPE *)HDcalloc(1, data_size); + VRFY((NULL != data), "HDcalloc succeeded"); + + correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); + VRFY((NULL != correct_buf), "HDcalloc succeeded"); + + for (i = 0; i < data_size / sizeof(*data); i++) + data[i] = (C_DATATYPE)GEN_DATA(i); + + for (i = 0; i < (size_t)mpi_size; i++) { + size_t rank_n_elems = (size_t)(mpi_size * (WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NROWS * + WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NCOLS)); + size_t data_idx = i; + + for (size_t j = 0; j < rank_n_elems; j++) { + if ((j % WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NCOLS) == 0) { + correct_buf[(i * rank_n_elems) + j] = (C_DATATYPE)data_idx; + data_idx++; + } + } + } + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + if (data) HDfree(data); @@ -472,10 +832,10 @@ test_write_filtered_dataset_no_overlap(void) read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - dset_id = H5Dopen2(file_id, "/" WRITE_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); @@ -485,10 +845,10 @@ test_write_filtered_dataset_no_overlap(void) if (read_buf) HDfree(read_buf); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); - VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -505,7 +865,8 @@ test_write_filtered_dataset_no_overlap(void) * 02/01/2017 */ static void -test_write_filtered_dataset_overlap(void) +test_write_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *data = NULL; C_DATATYPE *read_buf = NULL; @@ -518,27 +879,18 @@ test_write_filtered_dataset_overlap(void) hsize_t count[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS]; hsize_t block[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS]; size_t i, data_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing write to shared filtered chunks"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_NROWS; @@ -555,20 +907,22 @@ test_write_filtered_dataset_overlap(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, WRITE_SHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, + dset_id = H5Dcreate2(group_id, WRITE_SHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); /* Each process defines the dataset selection in memory and writes @@ -616,15 +970,12 @@ test_write_filtered_dataset_overlap(void) (dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) + (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1])); - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - - VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0), + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + if (data) HDfree(data); @@ -634,10 +985,10 @@ test_write_filtered_dataset_overlap(void) read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - dset_id = H5Dopen2(file_id, "/" WRITE_SHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, WRITE_SHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); @@ -647,10 +998,650 @@ test_write_filtered_dataset_overlap(void) if (read_buf) HDfree(read_buf); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); + VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); + + return; +} + +/* + * Tests parallel write of filtered data in the case where + * a dataset has a single unlimited dimension and each + * MPI rank writes to its own separate chunk. On each + * iteration, the dataset is extended in its extensible + * dimension by "MPI size" chunks per rank and the new + * chunks are written to, read back and verified. + */ +static void +test_write_filtered_dataset_single_unlim_dim_no_overlap(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) +{ + C_DATATYPE *data = NULL; + C_DATATYPE *read_buf = NULL; + hsize_t dataset_dims[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + hsize_t max_dims[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + hsize_t chunk_dims[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + hsize_t sel_dims[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + hsize_t start[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + hsize_t stride[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + hsize_t count[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + hsize_t block[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + size_t i, data_size; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID; + + if (MAINPROCESS) + HDputs("Testing write to unshared filtered chunks w/ single unlimited dimension"); + + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); + VRFY((file_id >= 0), "Test file open succeeded"); + + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + + /* Create the dataspace for the dataset */ + dataset_dims[0] = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_NROWS; + dataset_dims[1] = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_NCOLS; + max_dims[0] = dataset_dims[0]; + max_dims[1] = H5S_UNLIMITED; + chunk_dims[0] = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NROWS; + chunk_dims[1] = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NCOLS; + sel_dims[0] = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NROWS; + sel_dims[1] = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_NCOLS; + + filespace = H5Screate_simple(WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS, dataset_dims, max_dims); + VRFY((filespace >= 0), "File dataspace creation succeeded"); + + /* Create chunked dataset */ + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); + + VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS, chunk_dims) >= 0), + "Chunk size set"); + + /* Add test filter to the pipeline */ + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); + + dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, + H5P_DEFAULT, plist_id, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset creation succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + + /* Fill data buffer */ + data_size = sel_dims[0] * sel_dims[1] * sizeof(*data); + + data = (C_DATATYPE *)HDcalloc(1, data_size); + VRFY((NULL != data), "HDcalloc succeeded"); + + read_buf = (C_DATATYPE *)HDcalloc(1, data_size); + VRFY((NULL != read_buf), "HDcalloc succeeded"); + + for (i = 0; i < data_size / sizeof(*data); i++) + data[i] = (C_DATATYPE)GEN_DATA(i); + + for (i = 0; i < (size_t)WRITE_UNSHARED_ONE_UNLIM_DIM_NLOOPS; i++) { + /* Select hyperslab in the file */ + filespace = H5Dget_space(dset_id); + VRFY((filespace >= 0), "File dataspace retrieval succeeded"); + + /* Each process defines the dataset selection in memory and writes + * it to the hyperslab in the file + */ + count[0] = 1; + count[1] = + (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_NCOLS / (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NCOLS; + stride[0] = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NROWS; + stride[1] = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NCOLS; + block[0] = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NROWS; + block[1] = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NCOLS; + start[0] = ((hsize_t)mpi_rank * block[0] * count[0]); + start[1] = i * count[1] * block[1]; + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], + block[1]); + HDfflush(stdout); + } + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + dset_id = H5Dopen2(group_id, WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_NAME, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + HDmemset(read_buf, 255, data_size); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + /* Verify the correct data was written */ + VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded"); + + if (i < (size_t)WRITE_UNSHARED_ONE_UNLIM_DIM_NLOOPS - 1) { + /* Extend the dataset by count[1] chunks in the extensible dimension */ + dataset_dims[1] += count[1] * block[1]; + VRFY(H5Dset_extent(dset_id, dataset_dims) >= 0, "H5Dset_extent succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN); + } + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + } + + if (data) + HDfree(data); + if (read_buf) + HDfree(read_buf); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); + VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); + + return; +} + +/* + * Tests parallel write of filtered data in the case where + * a dataset has a single unlimited dimension and each + * MPI rank writes to a portion of each chunk in the dataset. + * On each iteration, the dataset is extended in its extensible + * dimension by two chunks and the new chunks are written to + * by all ranks, then read back and verified. + */ +static void +test_write_filtered_dataset_single_unlim_dim_overlap(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) +{ + C_DATATYPE *data = NULL; + C_DATATYPE *read_buf = NULL; + hsize_t dataset_dims[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + hsize_t max_dims[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + hsize_t chunk_dims[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + hsize_t sel_dims[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + hsize_t start[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + hsize_t stride[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + hsize_t count[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + hsize_t block[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS]; + size_t i, data_size; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID; + + if (MAINPROCESS) + HDputs("Testing write to shared filtered chunks w/ single unlimited dimension"); + + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); + VRFY((file_id >= 0), "Test file open succeeded"); + + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + + /* Create the dataspace for the dataset */ + dataset_dims[0] = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_NROWS; + dataset_dims[1] = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_NCOLS; + max_dims[0] = dataset_dims[0]; + max_dims[1] = H5S_UNLIMITED; + chunk_dims[0] = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NROWS; + chunk_dims[1] = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NCOLS; + sel_dims[0] = (hsize_t)DIM0_SCALE_FACTOR; + sel_dims[1] = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NCOLS * (hsize_t)DIM1_SCALE_FACTOR; + + filespace = H5Screate_simple(WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS, dataset_dims, max_dims); + VRFY((filespace >= 0), "File dataspace creation succeeded"); + + /* Create chunked dataset */ + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); + + VRFY((H5Pset_chunk(plist_id, WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS, chunk_dims) >= 0), + "Chunk size set"); + + /* Add test filter to the pipeline */ + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); + + dset_id = H5Dcreate2(group_id, WRITE_SHARED_ONE_UNLIM_DIM_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, + H5P_DEFAULT, plist_id, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset creation succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + + /* Fill data buffer */ + data_size = sel_dims[0] * sel_dims[1] * sizeof(*data); + + data = (C_DATATYPE *)HDcalloc(1, data_size); + VRFY((NULL != data), "HDcalloc succeeded"); + + read_buf = (C_DATATYPE *)HDcalloc(1, data_size); + VRFY((NULL != read_buf), "HDcalloc succeeded"); + + for (i = 0; i < data_size / sizeof(*data); i++) + data[i] = (C_DATATYPE)GEN_DATA(i); + + for (i = 0; i < (size_t)WRITE_SHARED_ONE_UNLIM_DIM_NLOOPS; i++) { + /* Select hyperslab in the file */ + filespace = H5Dget_space(dset_id); + VRFY((filespace >= 0), "File dataspace retrieval succeeded"); + + /* Each process defines the dataset selection in memory and writes + * it to the hyperslab in the file + */ + count[0] = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_NROWS / (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NROWS; + count[1] = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_NCOLS / (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NCOLS; + stride[0] = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NROWS; + stride[1] = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NCOLS; + block[0] = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NROWS / (hsize_t)mpi_size; + block[1] = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NCOLS; + start[0] = (hsize_t)mpi_rank * block[0]; + start[1] = i * count[1] * block[1]; + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], + block[1]); + HDfflush(stdout); + } + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + dset_id = H5Dopen2(group_id, WRITE_SHARED_ONE_UNLIM_DIM_DATASET_NAME, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + HDmemset(read_buf, 255, data_size); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + /* Verify correct data was written */ + VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded"); + + if (i < (size_t)WRITE_SHARED_ONE_UNLIM_DIM_NLOOPS - 1) { + /* Extend the dataset by count[1] chunks in the extensible dimension */ + dataset_dims[1] += count[1] * block[1]; + VRFY(H5Dset_extent(dset_id, dataset_dims) >= 0, "H5Dset_extent succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN); + } + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + } + + if (data) + HDfree(data); + if (read_buf) + HDfree(read_buf); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); + VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); + + return; +} + +/* + * Tests parallel write of filtered data in the case where + * a dataset has two unlimited dimensions and each + * MPI rank writes to its own separate chunks. On each + * iteration, the dataset is extended in its first + * extensible dimension by the size of one chunk per rank + * and in its second extensible dimension by the size of + * one chunk. Then, all chunks are written to, read back + * and verified. + */ +static void +test_write_filtered_dataset_multi_unlim_dim_no_overlap(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) +{ + C_DATATYPE *data = NULL; + C_DATATYPE *read_buf = NULL; + hsize_t dataset_dims[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + hsize_t max_dims[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + hsize_t chunk_dims[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + hsize_t sel_dims[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + hsize_t start[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + hsize_t stride[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + hsize_t count[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + hsize_t block[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + size_t i, data_size; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID; + + if (MAINPROCESS) + HDputs("Testing write to unshared filtered chunks w/ two unlimited dimensions"); + + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); + VRFY((file_id >= 0), "Test file open succeeded"); + + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + + /* Create the dataspace for the dataset */ + dataset_dims[0] = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_NROWS; + dataset_dims[1] = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_NCOLS; + max_dims[0] = H5S_UNLIMITED; + max_dims[1] = H5S_UNLIMITED; + chunk_dims[0] = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NROWS; + chunk_dims[1] = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NCOLS; + sel_dims[0] = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NROWS; + sel_dims[1] = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_NCOLS; + + filespace = H5Screate_simple(WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS, dataset_dims, max_dims); + VRFY((filespace >= 0), "File dataspace creation succeeded"); + + /* Create chunked dataset */ + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); + + VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS, chunk_dims) >= 0), + "Chunk size set"); + + /* Add test filter to the pipeline */ + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); + + dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, + H5P_DEFAULT, plist_id, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset creation succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + + for (i = 0; i < (size_t)WRITE_UNSHARED_TWO_UNLIM_DIM_NLOOPS; i++) { + C_DATATYPE *tmp_realloc = NULL; + size_t j; + + /* Set selected dimensions */ + sel_dims[0] = (i + 1) * WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NROWS; + sel_dims[1] = (i + 1) * WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NCOLS; + + /* Fill data buffer */ + data_size = sel_dims[0] * sel_dims[1] * sizeof(*data); + + tmp_realloc = (C_DATATYPE *)HDrealloc(data, data_size); + VRFY((NULL != tmp_realloc), "HDrealloc succeeded"); + data = tmp_realloc; + + tmp_realloc = (C_DATATYPE *)HDrealloc(read_buf, data_size); + VRFY((NULL != tmp_realloc), "HDrealloc succeeded"); + read_buf = tmp_realloc; + + for (j = 0; j < data_size / sizeof(*data); j++) + data[j] = (C_DATATYPE)GEN_DATA(j); + + /* Select hyperslab in the file */ + filespace = H5Dget_space(dset_id); + VRFY((filespace >= 0), "File dataspace retrieval succeeded"); + + /* Each process defines the dataset selection in memory and writes + * it to the hyperslab in the file + */ + count[0] = (i + 1); + count[1] = (i + 1); + stride[0] = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NROWS; + stride[1] = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NCOLS; + block[0] = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NROWS; + block[1] = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NCOLS; + start[0] = ((hsize_t)mpi_rank * block[0] * count[0]); + start[1] = 0; + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], + block[1]); + HDfflush(stdout); + } + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + dset_id = H5Dopen2(group_id, WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_NAME, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + HDmemset(read_buf, 255, data_size); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + /* Verify the correct data was written */ + VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded"); + + if (i < (size_t)WRITE_UNSHARED_TWO_UNLIM_DIM_NLOOPS - 1) { + /* + * Extend the dataset by the size of one chunk per rank + * in the first extensible dimension. Extend the dataset + * by the size of chunk in the second extensible dimension. + */ + dataset_dims[0] += (hsize_t)mpi_size * block[0]; + dataset_dims[1] += block[1]; + VRFY(H5Dset_extent(dset_id, dataset_dims) >= 0, "H5Dset_extent succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN); + } + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + } + + if (data) + HDfree(data); + if (read_buf) + HDfree(read_buf); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); + VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); + + return; +} + +/* + * Tests parallel write of filtered data in the case where + * a dataset has two unlimited dimensions and each MPI + * rank writes to a portion of each chunk in the dataset. + * On each iteration, the dataset is extended in its extensible + * dimensions by the size of a chunk and then all chunks are + * written to by all ranks, then read back and verified. + */ +static void +test_write_filtered_dataset_multi_unlim_dim_overlap(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) +{ + C_DATATYPE *data = NULL; + C_DATATYPE *read_buf = NULL; + hsize_t dataset_dims[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + hsize_t max_dims[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + hsize_t chunk_dims[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + hsize_t sel_dims[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + hsize_t start[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + hsize_t stride[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + hsize_t count[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + hsize_t block[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS]; + size_t i, data_size; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID; + + if (MAINPROCESS) + HDputs("Testing write to shared filtered chunks w/ two unlimited dimensions"); + + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); + VRFY((file_id >= 0), "Test file open succeeded"); + + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + + /* Create the dataspace for the dataset */ + dataset_dims[0] = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_NROWS; + dataset_dims[1] = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_NCOLS; + max_dims[0] = H5S_UNLIMITED; + max_dims[1] = H5S_UNLIMITED; + chunk_dims[0] = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NROWS; + chunk_dims[1] = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NCOLS; + sel_dims[0] = (hsize_t)DIM0_SCALE_FACTOR; + sel_dims[1] = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NCOLS * (hsize_t)DIM1_SCALE_FACTOR; + + filespace = H5Screate_simple(WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS, dataset_dims, max_dims); + VRFY((filespace >= 0), "File dataspace creation succeeded"); + + /* Create chunked dataset */ + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); + + VRFY((H5Pset_chunk(plist_id, WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS, chunk_dims) >= 0), + "Chunk size set"); + + /* Add test filter to the pipeline */ + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); + + dset_id = H5Dcreate2(group_id, WRITE_SHARED_TWO_UNLIM_DIM_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, + H5P_DEFAULT, plist_id, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset creation succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + + for (i = 0; i < (size_t)WRITE_SHARED_TWO_UNLIM_DIM_NLOOPS; i++) { + C_DATATYPE *tmp_realloc = NULL; + size_t j; + + /* Set selected dimensions */ + sel_dims[0] = (i + 1); + sel_dims[1] = (i + 1) * (size_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NCOLS; + + /* Fill data buffer */ + data_size = sel_dims[0] * sel_dims[1] * sizeof(*data); + + tmp_realloc = (C_DATATYPE *)HDrealloc(data, data_size); + VRFY((NULL != tmp_realloc), "HDrealloc succeeded"); + data = tmp_realloc; + + tmp_realloc = (C_DATATYPE *)HDrealloc(read_buf, data_size); + VRFY((NULL != tmp_realloc), "HDrealloc succeeded"); + read_buf = tmp_realloc; + + for (j = 0; j < data_size / sizeof(*data); j++) + data[j] = (C_DATATYPE)GEN_DATA(j); + + /* Select hyperslab in the file */ + filespace = H5Dget_space(dset_id); + VRFY((filespace >= 0), "File dataspace retrieval succeeded"); + + /* Each process defines the dataset selection in memory and writes + * it to the hyperslab in the file + */ + count[0] = (i + 1); + count[1] = (i + 1); + stride[0] = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NROWS; + stride[1] = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NCOLS; + block[0] = 1; + block[1] = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NROWS; + start[0] = (hsize_t)mpi_rank; + start[1] = 0; + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], + block[1]); + HDfflush(stdout); + } + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + dset_id = H5Dopen2(group_id, WRITE_SHARED_TWO_UNLIM_DIM_DATASET_NAME, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + HDmemset(read_buf, 255, data_size); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + /* Verify correct data was written */ + VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded"); + + if (i < (size_t)WRITE_SHARED_TWO_UNLIM_DIM_NLOOPS - 1) { + /* Extend the dataset by the size of a chunk in each extensible dimension */ + dataset_dims[0] += (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NROWS; + dataset_dims[1] += (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NCOLS; + VRFY(H5Dset_extent(dset_id, dataset_dims) >= 0, "H5Dset_extent succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN); + } + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + } + + if (data) + HDfree(data); + if (read_buf) + HDfree(read_buf); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -669,7 +1660,8 @@ test_write_filtered_dataset_overlap(void) * 02/01/2017 */ static void -test_write_filtered_dataset_single_no_selection(void) +test_write_filtered_dataset_single_no_selection(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *data = NULL; C_DATATYPE *read_buf = NULL; @@ -683,27 +1675,18 @@ test_write_filtered_dataset_single_no_selection(void) hsize_t block[WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS]; size_t i, data_size, correct_buf_size; size_t segment_length; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing write to filtered chunks with a single process having no selection"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NROWS; @@ -723,20 +1706,22 @@ test_write_filtered_dataset_single_no_selection(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, + dset_id = H5Dcreate2(group_id, WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); /* Each process defines the dataset selection in memory and writes @@ -774,15 +1759,17 @@ test_write_filtered_dataset_single_no_selection(void) data_size = sel_dims[0] * sel_dims[1] * sizeof(*data); correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf); - data = (C_DATATYPE *)HDcalloc(1, data_size); - VRFY((NULL != data), "HDcalloc succeeded"); + if (mpi_rank != WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC) { + data = (C_DATATYPE *)HDcalloc(1, data_size); + VRFY((NULL != data), "HDcalloc succeeded"); + + for (i = 0; i < data_size / sizeof(*data); i++) + data[i] = (C_DATATYPE)GEN_DATA(i); + } correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != correct_buf), "HDcalloc succeeded"); - for (i = 0; i < data_size / sizeof(*data); i++) - data[i] = (C_DATATYPE)GEN_DATA(i); - for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++) correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])) + (i / (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1]))); @@ -793,15 +1780,12 @@ test_write_filtered_dataset_single_no_selection(void) ((size_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC * segment_length), 0, segment_length * sizeof(*data)); - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - - VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0), + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0), "Dataset write succeeded"); + /* Verify space allocation status - data should only have been written if MPI size > 1 */ + verify_space_alloc_status(dset_id, plist_id, (mpi_size > 1 ? SOME_CHUNKS_WRITTEN : NO_CHUNKS_WRITTEN)); + if (data) HDfree(data); @@ -811,10 +1795,10 @@ test_write_filtered_dataset_single_no_selection(void) read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - dset_id = H5Dopen2(file_id, "/" WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); @@ -824,10 +1808,11 @@ test_write_filtered_dataset_single_no_selection(void) if (read_buf) HDfree(read_buf); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -847,7 +1832,8 @@ test_write_filtered_dataset_single_no_selection(void) * 02/02/2017 */ static void -test_write_filtered_dataset_all_no_selection(void) +test_write_filtered_dataset_all_no_selection(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *data = NULL; C_DATATYPE *read_buf = NULL; @@ -856,27 +1842,18 @@ test_write_filtered_dataset_all_no_selection(void) hsize_t chunk_dims[WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS]; hsize_t sel_dims[WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS]; size_t i, data_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing write to filtered chunks with all processes having no selection"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = (hsize_t)WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_NROWS; @@ -892,20 +1869,22 @@ test_write_filtered_dataset_all_no_selection(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, + dset_id = H5Dcreate2(group_id, WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); filespace = H5Dget_space(dset_id); @@ -926,15 +1905,12 @@ test_write_filtered_dataset_all_no_selection(void) for (i = 0; i < data_size / sizeof(*data); i++) data[i] = (C_DATATYPE)GEN_DATA(i); - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - - VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0), + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0), "Dataset write succeeded"); + /* Verify space allocation status - no ranks should have written any data */ + verify_space_alloc_status(dset_id, plist_id, NO_CHUNKS_WRITTEN); + if (data) HDfree(data); @@ -944,10 +1920,10 @@ test_write_filtered_dataset_all_no_selection(void) read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - dset_id = H5Dopen2(file_id, "/" WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); @@ -957,10 +1933,11 @@ test_write_filtered_dataset_all_no_selection(void) if (read_buf) HDfree(read_buf); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -974,7 +1951,8 @@ test_write_filtered_dataset_all_no_selection(void) * 02/02/2017 */ static void -test_write_filtered_dataset_point_selection(void) +test_write_filtered_dataset_point_selection(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *data = NULL; C_DATATYPE *correct_buf = NULL; @@ -985,27 +1963,18 @@ test_write_filtered_dataset_point_selection(void) hsize_t sel_dims[WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS]; size_t i, j, data_size, correct_buf_size; size_t num_points; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing write to filtered chunks with point selection"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NROWS; @@ -1022,20 +1991,22 @@ test_write_filtered_dataset_point_selection(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, + dset_id = H5Dcreate2(group_id, WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); /* Set up point selection */ @@ -1075,15 +2046,12 @@ test_write_filtered_dataset_point_selection(void) (dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) + (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1])); - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - - VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0), + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + if (data) HDfree(data); @@ -1093,10 +2061,10 @@ test_write_filtered_dataset_point_selection(void) read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - dset_id = H5Dopen2(file_id, "/" WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); @@ -1108,10 +2076,11 @@ test_write_filtered_dataset_point_selection(void) if (read_buf) HDfree(read_buf); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -1129,7 +2098,8 @@ test_write_filtered_dataset_point_selection(void) * 02/02/2017 */ static void -test_write_filtered_dataset_interleaved_write(void) +test_write_filtered_dataset_interleaved_write(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *data = NULL; C_DATATYPE *read_buf = NULL; @@ -1142,27 +2112,18 @@ test_write_filtered_dataset_interleaved_write(void) hsize_t count[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS]; hsize_t block[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS]; size_t i, data_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing interleaved write to filtered chunks"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_NROWS; @@ -1179,20 +2140,22 @@ test_write_filtered_dataset_interleaved_write(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, INTERLEAVED_WRITE_FILTERED_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, INTERLEAVED_WRITE_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, + dset_id = H5Dcreate2(group_id, INTERLEAVED_WRITE_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); /* Each process defines the dataset selection in memory and writes @@ -1251,15 +2214,12 @@ test_write_filtered_dataset_interleaved_write(void) + ((hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS * (i / (hsize_t)(mpi_size * INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS)))); - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - - VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0), + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + if (data) HDfree(data); @@ -1269,10 +2229,10 @@ test_write_filtered_dataset_interleaved_write(void) read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - dset_id = H5Dopen2(file_id, "/" INTERLEAVED_WRITE_FILTERED_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, INTERLEAVED_WRITE_FILTERED_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); @@ -1282,10 +2242,11 @@ test_write_filtered_dataset_interleaved_write(void) if (read_buf) HDfree(read_buf); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -1308,7 +2269,8 @@ test_write_filtered_dataset_interleaved_write(void) * 08/20/2021 */ static void -test_write_transformed_filtered_dataset_no_overlap(void) +test_write_transformed_filtered_dataset_no_overlap(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *data = NULL; C_DATATYPE *read_buf = NULL; @@ -1321,27 +2283,18 @@ test_write_transformed_filtered_dataset_no_overlap(void) hsize_t count[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS]; hsize_t block[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS]; size_t i, data_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing write to unshared transformed and filtered chunks"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NROWS; @@ -1358,19 +2311,22 @@ test_write_transformed_filtered_dataset_no_overlap(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, - filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); + dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME, + HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); @@ -1419,11 +2375,9 @@ test_write_transformed_filtered_dataset_no_overlap(void) correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])) + (i / (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1]))); - /* Create property list for collective dataset write and data transform */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); + /* Create property list for data transform */ + plist_id = H5Pcopy(dxpl_id); + VRFY((plist_id >= 0), "DXPL copy succeeded"); /* Set data transform expression */ VRFY((H5Pset_data_transform(plist_id, "x") >= 0), "Set data transform expression succeeded"); @@ -1440,7 +2394,7 @@ test_write_transformed_filtered_dataset_no_overlap(void) read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - dset_id = H5Dopen2(file_id, "/" WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), @@ -1448,6 +2402,13 @@ test_write_transformed_filtered_dataset_no_overlap(void) VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + + /* Verify space allocation status */ + plist_id = H5Dget_create_plist(dset_id); + VRFY((plist_id >= 0), "H5Dget_create_plist succeeded"); + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + if (correct_buf) HDfree(correct_buf); if (read_buf) @@ -1457,6 +2418,7 @@ test_write_transformed_filtered_dataset_no_overlap(void) VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -1471,7 +2433,8 @@ test_write_transformed_filtered_dataset_no_overlap(void) * 02/06/2017 */ static void -test_write_3d_filtered_dataset_no_overlap_separate_pages(void) +test_write_3d_filtered_dataset_no_overlap_separate_pages(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *data = NULL; C_DATATYPE *read_buf = NULL; @@ -1484,27 +2447,18 @@ test_write_3d_filtered_dataset_no_overlap_separate_pages(void) hsize_t count[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS]; hsize_t block[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS]; size_t i, data_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing write to unshared filtered chunks on separate pages in 3D dataset"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NROWS; @@ -1524,20 +2478,22 @@ test_write_3d_filtered_dataset_no_overlap_separate_pages(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME, HDF5_DATATYPE_NAME, - filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); + dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME, + HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); /* Each process defines the dataset selection in memory and writes @@ -1591,15 +2547,12 @@ test_write_3d_filtered_dataset_no_overlap_separate_pages(void) for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++) correct_buf[i] = (C_DATATYPE)((i % (hsize_t)mpi_size) + (i / (hsize_t)mpi_size)); - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - - VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0), + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + if (data) HDfree(data); @@ -1609,10 +2562,10 @@ test_write_3d_filtered_dataset_no_overlap_separate_pages(void) read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - dset_id = H5Dopen2(file_id, "/" WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); @@ -1622,10 +2575,11 @@ test_write_3d_filtered_dataset_no_overlap_separate_pages(void) if (read_buf) HDfree(read_buf); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -1641,7 +2595,8 @@ test_write_3d_filtered_dataset_no_overlap_separate_pages(void) * 02/06/2017 */ static void -test_write_3d_filtered_dataset_no_overlap_same_pages(void) +test_write_3d_filtered_dataset_no_overlap_same_pages(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *data = NULL; C_DATATYPE *read_buf = NULL; @@ -1654,27 +2609,18 @@ test_write_3d_filtered_dataset_no_overlap_same_pages(void) hsize_t count[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS]; hsize_t block[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS]; size_t i, data_size, correct_buf_size; - hid_t file_id, dset_id, plist_id; - hid_t filespace, memspace; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing write to unshared filtered chunks on the same pages in 3D dataset"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NROWS; @@ -1695,20 +2641,22 @@ test_write_3d_filtered_dataset_no_overlap_same_pages(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME, + dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); /* Each process defines the dataset selection in memory and writes @@ -1762,15 +2710,12 @@ test_write_3d_filtered_dataset_no_overlap_same_pages(void) correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] * dataset_dims[1])) + (i / (dataset_dims[0] * dataset_dims[1]))); - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - - VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0), + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + if (data) HDfree(data); @@ -1780,10 +2725,10 @@ test_write_3d_filtered_dataset_no_overlap_same_pages(void) read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - dset_id = H5Dopen2(file_id, "/" WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); @@ -1793,10 +2738,11 @@ test_write_3d_filtered_dataset_no_overlap_same_pages(void) if (read_buf) HDfree(read_buf); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -1812,7 +2758,8 @@ test_write_3d_filtered_dataset_no_overlap_same_pages(void) * 02/06/2017 */ static void -test_write_3d_filtered_dataset_overlap(void) +test_write_3d_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *data = NULL; C_DATATYPE *read_buf = NULL; @@ -1825,27 +2772,18 @@ test_write_3d_filtered_dataset_overlap(void) hsize_t count[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS]; hsize_t block[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS]; size_t i, data_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing write to shared filtered chunks in 3D dataset"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_NROWS; @@ -1865,20 +2803,22 @@ test_write_3d_filtered_dataset_overlap(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, - H5P_DEFAULT, plist_id, H5P_DEFAULT); + dset_id = H5Dcreate2(group_id, WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, HDF5_DATATYPE_NAME, + filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); /* Each process defines the dataset selection in memory and writes @@ -1943,15 +2883,12 @@ test_write_3d_filtered_dataset_overlap(void) (i / (hsize_t)(mpi_size * WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH * WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS)))); - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - - VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0), + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + if (data) HDfree(data); @@ -1961,10 +2898,10 @@ test_write_3d_filtered_dataset_overlap(void) read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - dset_id = H5Dopen2(file_id, "/" WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); @@ -1974,10 +2911,11 @@ test_write_3d_filtered_dataset_overlap(void) if (read_buf) HDfree(read_buf); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -1992,7 +2930,8 @@ test_write_3d_filtered_dataset_overlap(void) * 02/10/2017 */ static void -test_write_cmpd_filtered_dataset_no_conversion_unshared(void) +test_write_cmpd_filtered_dataset_no_conversion_unshared(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { COMPOUND_C_DATATYPE *data = NULL; COMPOUND_C_DATATYPE *read_buf = NULL; @@ -2005,28 +2944,27 @@ test_write_cmpd_filtered_dataset_no_conversion_unshared(void) hsize_t count[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS]; hsize_t block[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS]; size_t i, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1, memtype = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID, + memtype = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing write to unshared filtered chunks in Compound Datatype dataset without Datatype " "conversion"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); + /* SZIP and ScaleOffset filters don't support compound types */ + if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) { + if (MAINPROCESS) + SKIPPED(); + return; + } - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_NROWS; @@ -2045,15 +2983,15 @@ test_write_cmpd_filtered_dataset_no_conversion_unshared(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); /* Create the compound type for memory. */ memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE)); @@ -2066,11 +3004,13 @@ test_write_cmpd_filtered_dataset_no_conversion_unshared(void) VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0), "Datatype insertion succeeded"); - dset_id = H5Dcreate2(file_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME, memtype, - filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); + dset_id = H5Dcreate2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME, + memtype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); /* Each process defines the dataset selection in memory and writes @@ -2124,13 +3064,10 @@ test_write_cmpd_filtered_dataset_no_conversion_unshared(void) correct_buf[i].field3 = (long)((i % dataset_dims[1]) + (i / dataset_dims[1])); } - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); + VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, dxpl_id, data) >= 0), "Dataset write succeeded"); - VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, plist_id, data) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); if (data) HDfree(data); @@ -2141,11 +3078,11 @@ test_write_cmpd_filtered_dataset_no_conversion_unshared(void) read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - dset_id = H5Dopen2(file_id, "/" WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME, - H5P_DEFAULT); + dset_id = + H5Dopen2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); - VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), "Dataset read succeeded"); + VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); @@ -2154,11 +3091,12 @@ test_write_cmpd_filtered_dataset_no_conversion_unshared(void) if (read_buf) HDfree(read_buf); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); VRFY((H5Tclose(memtype) >= 0), "Datatype close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -2173,7 +3111,8 @@ test_write_cmpd_filtered_dataset_no_conversion_unshared(void) * 02/10/2017 */ static void -test_write_cmpd_filtered_dataset_no_conversion_shared(void) +test_write_cmpd_filtered_dataset_no_conversion_shared(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { COMPOUND_C_DATATYPE *data = NULL; COMPOUND_C_DATATYPE *read_buf = NULL; @@ -2186,28 +3125,27 @@ test_write_cmpd_filtered_dataset_no_conversion_shared(void) hsize_t count[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS]; hsize_t block[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS]; size_t i, correct_buf_size; - hid_t file_id, dset_id, plist_id, memtype; - hid_t filespace, memspace; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID, + memtype = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing write to shared filtered chunks in Compound Datatype dataset without Datatype " "conversion"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); + /* SZIP and ScaleOffset filters don't support compound types */ + if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) { + if (MAINPROCESS) + SKIPPED(); + return; + } - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_NROWS; @@ -2226,15 +3164,15 @@ test_write_cmpd_filtered_dataset_no_conversion_shared(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); /* Create the compound type for memory. */ memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE)); @@ -2247,11 +3185,13 @@ test_write_cmpd_filtered_dataset_no_conversion_shared(void) VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0), "Datatype insertion succeeded"); - dset_id = H5Dcreate2(file_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME, memtype, + dset_id = H5Dcreate2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME, memtype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); /* Each process defines the dataset selection in memory and writes @@ -2311,13 +3251,10 @@ test_write_cmpd_filtered_dataset_no_conversion_shared(void) (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1])); } - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); + VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, dxpl_id, data) >= 0), "Dataset write succeeded"); - VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, plist_id, data) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); if (data) HDfree(data); @@ -2329,10 +3266,10 @@ test_write_cmpd_filtered_dataset_no_conversion_shared(void) VRFY((NULL != read_buf), "HDcalloc succeeded"); dset_id = - H5Dopen2(file_id, "/" WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME, H5P_DEFAULT); + H5Dopen2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); - VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), "Dataset read succeeded"); + VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); @@ -2341,11 +3278,12 @@ test_write_cmpd_filtered_dataset_no_conversion_shared(void) if (read_buf) HDfree(read_buf); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); VRFY((H5Tclose(memtype) >= 0), "Datatype close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -2356,16 +3294,18 @@ test_write_cmpd_filtered_dataset_no_conversion_shared(void) * chunks using a compound datatype which requires a * datatype conversion. * - * NOTE: This test currently should fail because the - * datatype conversion causes the parallel library to - * break to independent I/O and this isn't allowed when - * there are filters in the pipeline. + * NOTE: This test currently should fail for mpi_size > 1 + * because the datatype conversion causes the parallel + * library to break to independent I/O and this isn't + * allowed when there are filters in the pipeline, + * unless there is only one MPI rank. * * Programmer: Jordan Henderson * 02/07/2017 */ static void -test_write_cmpd_filtered_dataset_type_conversion_unshared(void) +test_write_cmpd_filtered_dataset_type_conversion_unshared(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { COMPOUND_C_DATATYPE *data = NULL; COMPOUND_C_DATATYPE *read_buf = NULL; @@ -2378,28 +3318,33 @@ test_write_cmpd_filtered_dataset_type_conversion_unshared(void) hsize_t count[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS]; hsize_t block[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS]; size_t i, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1, filetype = -1, memtype = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID, + filetype = H5I_INVALID_HID, memtype = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing write to unshared filtered chunks in Compound Datatype dataset with Datatype " "conversion"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); + /* Skip for MPI communicator size of 1 */ + if (mpi_size == 1) { + SKIPPED(); + return; + } - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); + /* SZIP and ScaleOffset filters don't support compound types */ + if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) { + if (MAINPROCESS) + SKIPPED(); + return; + } - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_NROWS; @@ -2418,15 +3363,15 @@ test_write_cmpd_filtered_dataset_type_conversion_unshared(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); /* Create the compound type for memory. */ memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE)); @@ -2447,11 +3392,13 @@ test_write_cmpd_filtered_dataset_type_conversion_unshared(void) VRFY((H5Tinsert(filetype, "IntData", 8, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded"); VRFY((H5Tinsert(filetype, "LongData", 16, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded"); - dset_id = H5Dcreate2(file_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME, + dset_id = H5Dcreate2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME, filetype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); /* Each process defines the dataset selection in memory and writes @@ -2497,20 +3444,16 @@ test_write_cmpd_filtered_dataset_type_conversion_unshared(void) data[i].field3 = (long)GEN_DATA(i); } - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - /* Ensure that this test currently fails since type conversions break collective mode */ H5E_BEGIN_TRY { - VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, plist_id, data) < 0), - "Dataset write succeeded"); + VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, dxpl_id, data) < 0), "Dataset write succeeded"); } H5E_END_TRY; + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, NO_CHUNKS_WRITTEN); + if (data) HDfree(data); @@ -2520,11 +3463,11 @@ test_write_cmpd_filtered_dataset_type_conversion_unshared(void) read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - dset_id = H5Dopen2(file_id, "/" WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME, - H5P_DEFAULT); + dset_id = + H5Dopen2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); - VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), "Dataset read succeeded"); + VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); @@ -2533,12 +3476,13 @@ test_write_cmpd_filtered_dataset_type_conversion_unshared(void) if (read_buf) HDfree(read_buf); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); VRFY((H5Tclose(filetype) >= 0), "File datatype close succeeded"); VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -2549,16 +3493,18 @@ test_write_cmpd_filtered_dataset_type_conversion_unshared(void) * chunks using a compound datatype which requires * a datatype conversion. * - * NOTE: This test currently should fail because the - * datatype conversion causes the parallel library to - * break to independent I/O and this isn't allowed when - * there are filters in the pipeline. + * NOTE: This test currently should fail for mpi_size > 1 + * because the datatype conversion causes the parallel + * library to break to independent I/O and this isn't + * allowed when there are filters in the pipeline, + * unless there is only one MPI rank. * * Programmer: Jordan Henderson * 02/10/2017 */ static void -test_write_cmpd_filtered_dataset_type_conversion_shared(void) +test_write_cmpd_filtered_dataset_type_conversion_shared(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { COMPOUND_C_DATATYPE *data = NULL; COMPOUND_C_DATATYPE *read_buf = NULL; @@ -2571,28 +3517,33 @@ test_write_cmpd_filtered_dataset_type_conversion_shared(void) hsize_t count[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS]; hsize_t block[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS]; size_t i, correct_buf_size; - hid_t file_id, dset_id, plist_id, filetype, memtype; - hid_t filespace, memspace; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t filetype = H5I_INVALID_HID, memtype = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs( "Testing write to shared filtered chunks in Compound Datatype dataset with Datatype conversion"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); + /* Skip for MPI communicator size of 1 */ + if (mpi_size == 1) { + SKIPPED(); + return; + } - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); + /* SZIP and ScaleOffset filters don't support compound types */ + if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) { + if (MAINPROCESS) + SKIPPED(); + return; + } - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_NROWS; @@ -2611,15 +3562,15 @@ test_write_cmpd_filtered_dataset_type_conversion_shared(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); /* Create the compound type for memory. */ memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE)); @@ -2640,11 +3591,13 @@ test_write_cmpd_filtered_dataset_type_conversion_shared(void) VRFY((H5Tinsert(filetype, "IntData", 8, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded"); VRFY((H5Tinsert(filetype, "LongData", 16, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded"); - dset_id = H5Dcreate2(file_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME, + dset_id = H5Dcreate2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME, filetype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); /* Each process defines the dataset selection in memory and writes @@ -2690,20 +3643,16 @@ test_write_cmpd_filtered_dataset_type_conversion_shared(void) data[i].field3 = (long)GEN_DATA(i); } - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - /* Ensure that this test currently fails since type conversions break collective mode */ H5E_BEGIN_TRY { - VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, plist_id, data) < 0), - "Dataset write succeeded"); + VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, dxpl_id, data) < 0), "Dataset write succeeded"); } H5E_END_TRY; + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, NO_CHUNKS_WRITTEN); + if (data) HDfree(data); @@ -2713,11 +3662,11 @@ test_write_cmpd_filtered_dataset_type_conversion_shared(void) read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - dset_id = H5Dopen2(file_id, "/" WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME, - H5P_DEFAULT); + dset_id = + H5Dopen2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); - VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), "Dataset read succeeded"); + VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); @@ -2726,12 +3675,13 @@ test_write_cmpd_filtered_dataset_type_conversion_shared(void) if (read_buf) HDfree(read_buf); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); VRFY((H5Tclose(filetype) >= 0), "File datatype close succeeded"); VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -2751,7 +3701,8 @@ test_write_cmpd_filtered_dataset_type_conversion_shared(void) * 05/14/2018 */ static void -test_read_one_chunk_filtered_dataset(void) +test_read_one_chunk_filtered_dataset(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *read_buf = NULL; C_DATATYPE *correct_buf = NULL; @@ -2765,16 +3716,15 @@ test_read_one_chunk_filtered_dataset(void) hsize_t block[READ_ONE_CHUNK_FILTERED_DATASET_DIMS]; hsize_t flat_dims[1]; size_t i, read_buf_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; int * recvcounts = NULL; int * displs = NULL; if (MAINPROCESS) HDputs("Testing read from one-chunk filtered dataset"); - CHECK_CUR_FILTER_AVAIL(); - dataset_dims[0] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_NROWS; dataset_dims[1] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_NCOLS; @@ -2802,6 +3752,9 @@ test_read_one_chunk_filtered_dataset(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ filespace = H5Screate_simple(READ_ONE_CHUNK_FILTERED_DATASET_DIMS, dataset_dims, NULL); VRFY((filespace >= 0), "File dataspace creation succeeded"); @@ -2810,44 +3763,43 @@ test_read_one_chunk_filtered_dataset(void) chunk_dims[0] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_CH_NROWS; chunk_dims[1] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_CH_NCOLS; - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, READ_ONE_CHUNK_FILTERED_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, READ_ONE_CHUNK_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, + dset_id = H5Dcreate2(group_id, READ_ONE_CHUNK_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); - dset_id = H5Dopen2(file_id, "/" READ_ONE_CHUNK_FILTERED_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, READ_ONE_CHUNK_FILTERED_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); sel_dims[0] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_NROWS / (hsize_t)mpi_size; @@ -2887,18 +3839,12 @@ test_read_one_chunk_filtered_dataset(void) VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), "Hyperslab selection succeeded"); - /* Create property list for collective dataset read */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - read_buf_size = flat_dims[0] * sizeof(*read_buf); read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); @@ -2937,7 +3883,7 @@ test_read_one_chunk_filtered_dataset(void) VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -2956,7 +3902,8 @@ test_read_one_chunk_filtered_dataset(void) * 05/15/2018 */ static void -test_read_filtered_dataset_no_overlap(void) +test_read_filtered_dataset_no_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *read_buf = NULL; C_DATATYPE *correct_buf = NULL; @@ -2970,16 +3917,15 @@ test_read_filtered_dataset_no_overlap(void) hsize_t block[READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS]; hsize_t flat_dims[1]; size_t i, read_buf_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; int * recvcounts = NULL; int * displs = NULL; if (MAINPROCESS) HDputs("Testing read from unshared filtered chunks"); - CHECK_CUR_FILTER_AVAIL(); - dataset_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_NROWS; dataset_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_NCOLS; @@ -3006,6 +3952,9 @@ test_read_filtered_dataset_no_overlap(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ filespace = H5Screate_simple(READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL); VRFY((filespace >= 0), "File dataspace creation succeeded"); @@ -3014,44 +3963,43 @@ test_read_filtered_dataset_no_overlap(void) chunk_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NROWS; chunk_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NCOLS; - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, READ_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, + dset_id = H5Dcreate2(group_id, READ_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); - dset_id = H5Dopen2(file_id, "/" READ_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, READ_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); sel_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NROWS; @@ -3091,18 +4039,12 @@ test_read_filtered_dataset_no_overlap(void) VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), "Hyperslab selection succeeded"); - /* Create property list for collective dataset read */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - read_buf_size = flat_dims[0] * sizeof(*read_buf); read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); @@ -3141,7 +4083,7 @@ test_read_filtered_dataset_no_overlap(void) VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -3161,7 +4103,8 @@ test_read_filtered_dataset_no_overlap(void) * 05/15/2018 */ static void -test_read_filtered_dataset_overlap(void) +test_read_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *read_buf = NULL; C_DATATYPE *correct_buf = NULL; @@ -3175,16 +4118,15 @@ test_read_filtered_dataset_overlap(void) hsize_t block[READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS]; hsize_t flat_dims[1]; size_t i, read_buf_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; int * recvcounts = NULL; int * displs = NULL; if (MAINPROCESS) HDputs("Testing read from shared filtered chunks"); - CHECK_CUR_FILTER_AVAIL(); - dataset_dims[0] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_NROWS; dataset_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_NCOLS; @@ -3211,6 +4153,9 @@ test_read_filtered_dataset_overlap(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ filespace = H5Screate_simple(READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL); VRFY((filespace >= 0), "File dataspace creation succeeded"); @@ -3219,44 +4164,43 @@ test_read_filtered_dataset_overlap(void) chunk_dims[0] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NROWS; chunk_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NCOLS; - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, READ_SHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, - H5P_DEFAULT, plist_id, H5P_DEFAULT); + dset_id = H5Dcreate2(group_id, READ_SHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, + filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); - dset_id = H5Dopen2(file_id, "/" READ_SHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, READ_SHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); sel_dims[0] = (hsize_t)DIM0_SCALE_FACTOR; @@ -3296,18 +4240,12 @@ test_read_filtered_dataset_overlap(void) VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), "Hyperslab selection succeeded"); - /* Create property list for collective dataset read */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - read_buf_size = flat_dims[0] * sizeof(*read_buf); read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); @@ -3362,7 +4300,7 @@ test_read_filtered_dataset_overlap(void) VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -3382,7 +4320,8 @@ test_read_filtered_dataset_overlap(void) * 05/15/2018 */ static void -test_read_filtered_dataset_single_no_selection(void) +test_read_filtered_dataset_single_no_selection(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *read_buf = NULL; C_DATATYPE *correct_buf = NULL; @@ -3397,16 +4336,15 @@ test_read_filtered_dataset_single_no_selection(void) hsize_t flat_dims[1]; size_t i, read_buf_size, correct_buf_size; size_t segment_length; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; int * recvcounts = NULL; int * displs = NULL; if (MAINPROCESS) HDputs("Testing read from filtered chunks with a single process having no selection"); - CHECK_CUR_FILTER_AVAIL(); - dataset_dims[0] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NROWS; dataset_dims[1] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS; @@ -3437,6 +4375,9 @@ test_read_filtered_dataset_single_no_selection(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ filespace = H5Screate_simple(READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL); @@ -3446,44 +4387,43 @@ test_read_filtered_dataset_single_no_selection(void) chunk_dims[0] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS; chunk_dims[1] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS; - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, + dset_id = H5Dcreate2(group_id, READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); - dset_id = H5Dopen2(file_id, "/" READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); sel_dims[0] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS; @@ -3530,19 +4470,19 @@ test_read_filtered_dataset_single_no_selection(void) VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), "Hyperslab selection succeeded"); - /* Create property list for collective dataset read */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - read_buf_size = flat_dims[0] * sizeof(*read_buf); read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0), - "Dataset read succeeded"); + if (mpi_rank == READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC) { + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, NULL) >= 0), + "Dataset read succeeded"); + } + else { + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + } global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != global_buf), "HDcalloc succeeded"); @@ -3588,7 +4528,7 @@ test_read_filtered_dataset_single_no_selection(void) VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -3609,7 +4549,8 @@ test_read_filtered_dataset_single_no_selection(void) * 05/15/2018 */ static void -test_read_filtered_dataset_all_no_selection(void) +test_read_filtered_dataset_all_no_selection(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *read_buf = NULL; C_DATATYPE *correct_buf = NULL; @@ -3617,14 +4558,13 @@ test_read_filtered_dataset_all_no_selection(void) hsize_t chunk_dims[READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS]; hsize_t sel_dims[READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS]; size_t read_buf_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing read from filtered chunks with all processes having no selection"); - CHECK_CUR_FILTER_AVAIL(); - dataset_dims[0] = (hsize_t)READ_ALL_NO_SELECTION_FILTERED_CHUNKS_NROWS; dataset_dims[1] = (hsize_t)READ_ALL_NO_SELECTION_FILTERED_CHUNKS_NCOLS; @@ -3646,6 +4586,9 @@ test_read_filtered_dataset_all_no_selection(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ filespace = H5Screate_simple(READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL); VRFY((filespace >= 0), "File dataspace creation succeeded"); @@ -3654,44 +4597,43 @@ test_read_filtered_dataset_all_no_selection(void) chunk_dims[0] = (hsize_t)READ_ALL_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS; chunk_dims[1] = (hsize_t)READ_ALL_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS; - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, + dset_id = H5Dcreate2(group_id, READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); - dset_id = H5Dopen2(file_id, "/" READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); sel_dims[0] = sel_dims[1] = 0; @@ -3705,20 +4647,16 @@ test_read_filtered_dataset_all_no_selection(void) VRFY((H5Sselect_none(filespace) >= 0), "Select none succeeded"); - /* Create property list for collective dataset read */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - read_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*read_buf); read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); + VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); + if (read_buf) HDfree(read_buf); if (correct_buf) @@ -3727,7 +4665,7 @@ test_read_filtered_dataset_all_no_selection(void) VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -3747,7 +4685,8 @@ test_read_filtered_dataset_all_no_selection(void) * 05/15/2018 */ static void -test_read_filtered_dataset_point_selection(void) +test_read_filtered_dataset_point_selection(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *correct_buf = NULL; C_DATATYPE *read_buf = NULL; @@ -3759,16 +4698,15 @@ test_read_filtered_dataset_point_selection(void) hsize_t flat_dims[1]; size_t i, j, read_buf_size, correct_buf_size; size_t num_points; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; int * recvcounts = NULL; int * displs = NULL; if (MAINPROCESS) HDputs("Testing read from filtered chunks with point selection"); - CHECK_CUR_FILTER_AVAIL(); - dataset_dims[0] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NROWS; dataset_dims[1] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NCOLS; @@ -3795,6 +4733,9 @@ test_read_filtered_dataset_point_selection(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ filespace = H5Screate_simple(READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL); VRFY((filespace >= 0), "File dataspace creation succeeded"); @@ -3803,44 +4744,43 @@ test_read_filtered_dataset_point_selection(void) chunk_dims[0] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_CH_NROWS; chunk_dims[1] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_CH_NCOLS; - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, + dset_id = H5Dcreate2(group_id, READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); - dset_id = H5Dopen2(file_id, "/" READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); sel_dims[0] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NROWS / (hsize_t)mpi_size; @@ -3871,18 +4811,12 @@ test_read_filtered_dataset_point_selection(void) VRFY((H5Sselect_elements(filespace, H5S_SELECT_SET, (hsize_t)num_points, (const hsize_t *)coords) >= 0), "Point selection succeeded"); - /* Create property list for collective dataset read */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - read_buf_size = flat_dims[0] * sizeof(*read_buf); read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); @@ -3941,7 +4875,7 @@ test_read_filtered_dataset_point_selection(void) VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -3964,7 +4898,8 @@ test_read_filtered_dataset_point_selection(void) * 05/15/2018 */ static void -test_read_filtered_dataset_interleaved_read(void) +test_read_filtered_dataset_interleaved_read(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *read_buf = NULL; C_DATATYPE *correct_buf = NULL; @@ -3978,16 +4913,15 @@ test_read_filtered_dataset_interleaved_read(void) hsize_t block[INTERLEAVED_READ_FILTERED_DATASET_DIMS]; hsize_t flat_dims[1]; size_t i, read_buf_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; int * recvcounts = NULL; int * displs = NULL; if (MAINPROCESS) HDputs("Testing interleaved read from filtered chunks"); - CHECK_CUR_FILTER_AVAIL(); - dataset_dims[0] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_NROWS; dataset_dims[1] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_NCOLS; @@ -4023,6 +4957,9 @@ test_read_filtered_dataset_interleaved_read(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ filespace = H5Screate_simple(INTERLEAVED_READ_FILTERED_DATASET_DIMS, dataset_dims, NULL); VRFY((filespace >= 0), "File dataspace creation succeeded"); @@ -4031,44 +4968,43 @@ test_read_filtered_dataset_interleaved_read(void) chunk_dims[0] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_CH_NROWS; chunk_dims[1] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_CH_NCOLS; - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, INTERLEAVED_READ_FILTERED_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, INTERLEAVED_READ_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, + dset_id = H5Dcreate2(group_id, INTERLEAVED_READ_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); - dset_id = H5Dopen2(file_id, "/" INTERLEAVED_READ_FILTERED_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, INTERLEAVED_READ_FILTERED_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); sel_dims[0] = (hsize_t)(INTERLEAVED_READ_FILTERED_DATASET_NROWS / mpi_size); @@ -4110,18 +5046,12 @@ test_read_filtered_dataset_interleaved_read(void) VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), "Hyperslab selection succeeded"); - /* Create property list for collective dataset read */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - read_buf_size = flat_dims[0] * sizeof(*read_buf); read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); @@ -4176,7 +5106,7 @@ test_read_filtered_dataset_interleaved_read(void) VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -4196,7 +5126,8 @@ test_read_filtered_dataset_interleaved_read(void) * 05/16/2018 */ static void -test_read_3d_filtered_dataset_no_overlap_separate_pages(void) +test_read_3d_filtered_dataset_no_overlap_separate_pages(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { MPI_Datatype vector_type; MPI_Datatype resized_vector_type; @@ -4212,14 +5143,13 @@ test_read_3d_filtered_dataset_no_overlap_separate_pages(void) hsize_t block[READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS]; hsize_t flat_dims[1]; size_t i, read_buf_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing read from unshared filtered chunks on separate pages in 3D dataset"); - CHECK_CUR_FILTER_AVAIL(); - dataset_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NROWS; dataset_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NCOLS; dataset_dims[2] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DEPTH; @@ -4245,6 +5175,9 @@ test_read_3d_filtered_dataset_no_overlap_separate_pages(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ filespace = H5Screate_simple(READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS, dataset_dims, NULL); @@ -4255,45 +5188,44 @@ test_read_3d_filtered_dataset_no_overlap_separate_pages(void) chunk_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS; chunk_dims[2] = 1; - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY( (H5Pset_chunk(plist_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME, + dset_id = H5Dcreate2(group_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); - dset_id = H5Dopen2(file_id, "/" READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); sel_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NROWS; @@ -4340,18 +5272,12 @@ test_read_3d_filtered_dataset_no_overlap_separate_pages(void) VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), "Hyperslab selection succeeded"); - /* Create property list for collective dataset read */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - read_buf_size = flat_dims[0] * sizeof(*read_buf); read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); @@ -4392,7 +5318,7 @@ test_read_3d_filtered_dataset_no_overlap_separate_pages(void) VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -4419,7 +5345,8 @@ test_read_3d_filtered_dataset_no_overlap_separate_pages(void) * 08/20/2021 */ static void -test_read_transformed_filtered_dataset_no_overlap(void) +test_read_transformed_filtered_dataset_no_overlap(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *read_buf = NULL; C_DATATYPE *correct_buf = NULL; @@ -4433,16 +5360,15 @@ test_read_transformed_filtered_dataset_no_overlap(void) hsize_t block[READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS]; hsize_t flat_dims[1]; size_t i, read_buf_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; int * recvcounts = NULL; int * displs = NULL; if (MAINPROCESS) HDputs("Testing read from unshared transformed and filtered chunks"); - CHECK_CUR_FILTER_AVAIL(); - dataset_dims[0] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NROWS; dataset_dims[1] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NCOLS; @@ -4469,6 +5395,9 @@ test_read_transformed_filtered_dataset_no_overlap(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ filespace = H5Screate_simple(READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL); @@ -4478,20 +5407,23 @@ test_read_transformed_filtered_dataset_no_overlap(void) chunk_dims[0] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS; chunk_dims[1] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS; - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY( (H5Pset_chunk(plist_id, READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME, + dset_id = H5Dcreate2(group_id, READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); @@ -4505,26 +5437,26 @@ test_read_transformed_filtered_dataset_no_overlap(void) VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, correct_buf) >= 0), "Dataset write succeeded"); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + + /* Verify space allocation status */ + plist_id = H5Dget_create_plist(dset_id); + VRFY((plist_id >= 0), "H5Dget_create_plist succeeded"); + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); - dset_id = H5Dopen2(file_id, "/" READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); sel_dims[0] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS; @@ -4565,11 +5497,9 @@ test_read_transformed_filtered_dataset_no_overlap(void) VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), "Hyperslab selection succeeded"); - /* Create property list for collective dataset read and data transform */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); + /* Create property list for data transform */ + plist_id = H5Pcopy(dxpl_id); + VRFY((plist_id >= 0), "DXPL copy succeeded"); /* Set data transform expression */ VRFY((H5Pset_data_transform(plist_id, "x") >= 0), "Set data transform expression succeeded"); @@ -4619,6 +5549,7 @@ test_read_transformed_filtered_dataset_no_overlap(void) VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -4639,7 +5570,8 @@ test_read_transformed_filtered_dataset_no_overlap(void) * 05/16/2018 */ static void -test_read_3d_filtered_dataset_no_overlap_same_pages(void) +test_read_3d_filtered_dataset_no_overlap_same_pages(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *read_buf = NULL; C_DATATYPE *correct_buf = NULL; @@ -4653,16 +5585,15 @@ test_read_3d_filtered_dataset_no_overlap_same_pages(void) hsize_t block[READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS]; hsize_t flat_dims[1]; size_t i, read_buf_size, correct_buf_size; - hid_t file_id, dset_id, plist_id; - hid_t filespace, memspace; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; int * recvcounts = NULL; int * displs = NULL; if (MAINPROCESS) HDputs("Testing read from unshared filtered chunks on the same pages in 3D dataset"); - CHECK_CUR_FILTER_AVAIL(); - dataset_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NROWS; dataset_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NCOLS; dataset_dims[2] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DEPTH; @@ -4689,6 +5620,9 @@ test_read_3d_filtered_dataset_no_overlap_same_pages(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ filespace = H5Screate_simple(READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS, dataset_dims, NULL); @@ -4699,45 +5633,44 @@ test_read_3d_filtered_dataset_no_overlap_same_pages(void) chunk_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS; chunk_dims[2] = 1; - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME, + dset_id = H5Dcreate2(group_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); - dset_id = H5Dopen2(file_id, "/" READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); sel_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS; @@ -4783,18 +5716,12 @@ test_read_3d_filtered_dataset_no_overlap_same_pages(void) VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), "Hyperslab selection succeeded"); - /* Create property list for collective dataset read */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - read_buf_size = flat_dims[0] * sizeof(*read_buf); read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); @@ -4833,7 +5760,7 @@ test_read_3d_filtered_dataset_no_overlap_same_pages(void) VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -4854,7 +5781,8 @@ test_read_3d_filtered_dataset_no_overlap_same_pages(void) * 05/16/2018 */ static void -test_read_3d_filtered_dataset_overlap(void) +test_read_3d_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { MPI_Datatype vector_type; MPI_Datatype resized_vector_type; @@ -4870,14 +5798,13 @@ test_read_3d_filtered_dataset_overlap(void) hsize_t block[READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS]; hsize_t flat_dims[1]; size_t i, read_buf_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing read from shared filtered chunks in 3D dataset"); - CHECK_CUR_FILTER_AVAIL(); - dataset_dims[0] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_NROWS; dataset_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_NCOLS; dataset_dims[2] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_DEPTH; @@ -4916,6 +5843,9 @@ test_read_3d_filtered_dataset_overlap(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ filespace = H5Screate_simple(READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS, dataset_dims, NULL); VRFY((filespace >= 0), "File dataspace creation succeeded"); @@ -4925,44 +5855,43 @@ test_read_3d_filtered_dataset_overlap(void) chunk_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS; chunk_dims[2] = 1; - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, READ_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, HDF5_DATATYPE_NAME, + dset_id = H5Dcreate2(group_id, READ_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); - dset_id = H5Dopen2(file_id, "/" READ_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, READ_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); sel_dims[0] = (hsize_t)(READ_SHARED_FILTERED_CHUNKS_3D_NROWS / mpi_size); @@ -5007,18 +5936,12 @@ test_read_3d_filtered_dataset_overlap(void) VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), "Hyperslab selection succeeded"); - /* Create property list for collective dataset read */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - read_buf_size = flat_dims[0] * sizeof(*read_buf); read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size); @@ -5068,7 +5991,7 @@ test_read_3d_filtered_dataset_overlap(void) VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -5088,7 +6011,8 @@ test_read_3d_filtered_dataset_overlap(void) * 05/17/2018 */ static void -test_read_cmpd_filtered_dataset_no_conversion_unshared(void) +test_read_cmpd_filtered_dataset_no_conversion_unshared(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { COMPOUND_C_DATATYPE *read_buf = NULL; COMPOUND_C_DATATYPE *correct_buf = NULL; @@ -5102,16 +6026,23 @@ test_read_cmpd_filtered_dataset_no_conversion_unshared(void) hsize_t block[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS]; hsize_t flat_dims[1]; size_t i, read_buf_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1, memtype = -1; - hid_t filespace = -1, memspace = -1; - int * recvcounts = NULL; - int * displs = NULL; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID, + memtype = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; + int * recvcounts = NULL; + int * displs = NULL; if (MAINPROCESS) HDputs("Testing read from unshared filtered chunks in Compound Datatype dataset without Datatype " "conversion"); - CHECK_CUR_FILTER_AVAIL(); + /* SZIP and ScaleOffset filters don't support compound types */ + if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) { + if (MAINPROCESS) + SKIPPED(); + return; + } dataset_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_NROWS; dataset_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_NCOLS; @@ -5153,6 +6084,9 @@ test_read_cmpd_filtered_dataset_no_conversion_unshared(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ filespace = H5Screate_simple(READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS, dataset_dims, NULL); @@ -5162,46 +6096,45 @@ test_read_cmpd_filtered_dataset_no_conversion_unshared(void) chunk_dims[0] = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS; chunk_dims[1] = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS; - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME, + dset_id = H5Dcreate2(group_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME, memtype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Dwrite(dset_id, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); dset_id = - H5Dopen2(file_id, "/" READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME, H5P_DEFAULT); + H5Dopen2(group_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); sel_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS; @@ -5241,18 +6174,12 @@ test_read_cmpd_filtered_dataset_no_conversion_unshared(void) VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), "Hyperslab selection succeeded"); - /* Create property list for collective dataset read */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - read_buf_size = flat_dims[0] * sizeof(*read_buf); read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, read_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - VRFY((H5Dread(dset_id, memtype, memspace, filespace, plist_id, read_buf) >= 0), "Dataset read succeeded"); + VRFY((H5Dread(dset_id, memtype, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); global_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != global_buf), "HDcalloc succeeded"); @@ -5291,7 +6218,7 @@ test_read_cmpd_filtered_dataset_no_conversion_unshared(void) VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -5311,7 +6238,8 @@ test_read_cmpd_filtered_dataset_no_conversion_unshared(void) * 05/17/2018 */ static void -test_read_cmpd_filtered_dataset_no_conversion_shared(void) +test_read_cmpd_filtered_dataset_no_conversion_shared(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { COMPOUND_C_DATATYPE *read_buf = NULL; COMPOUND_C_DATATYPE *correct_buf = NULL; @@ -5325,16 +6253,23 @@ test_read_cmpd_filtered_dataset_no_conversion_shared(void) hsize_t block[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS]; hsize_t flat_dims[1]; size_t i, read_buf_size, correct_buf_size; - hid_t file_id, dset_id, plist_id, memtype; - hid_t filespace, memspace; - int * recvcounts = NULL; - int * displs = NULL; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID, + memtype = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; + int * recvcounts = NULL; + int * displs = NULL; if (MAINPROCESS) HDputs("Testing read from shared filtered chunks in Compound Datatype dataset without Datatype " "conversion"); - CHECK_CUR_FILTER_AVAIL(); + /* SZIP and ScaleOffset filters don't support compound types */ + if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) { + if (MAINPROCESS) + SKIPPED(); + return; + } dataset_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_NROWS; dataset_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_NCOLS; @@ -5382,6 +6317,9 @@ test_read_cmpd_filtered_dataset_no_conversion_shared(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ filespace = H5Screate_simple(READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS, dataset_dims, NULL); @@ -5391,46 +6329,45 @@ test_read_cmpd_filtered_dataset_no_conversion_shared(void) chunk_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS; chunk_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NCOLS; - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME, + dset_id = H5Dcreate2(group_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME, memtype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Dwrite(dset_id, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); dset_id = - H5Dopen2(file_id, "/" READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME, H5P_DEFAULT); + H5Dopen2(group_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); sel_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size; @@ -5470,18 +6407,12 @@ test_read_cmpd_filtered_dataset_no_conversion_shared(void) VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), "Hyperslab selection succeeded"); - /* Create property list for collective dataset read */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - read_buf_size = flat_dims[0] * sizeof(*read_buf); read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, read_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - VRFY((H5Dread(dset_id, memtype, memspace, filespace, plist_id, read_buf) >= 0), "Dataset read succeeded"); + VRFY((H5Dread(dset_id, memtype, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); global_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != global_buf), "HDcalloc succeeded"); @@ -5520,7 +6451,7 @@ test_read_cmpd_filtered_dataset_no_conversion_shared(void) VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -5540,7 +6471,8 @@ test_read_cmpd_filtered_dataset_no_conversion_shared(void) * 05/17/2018 */ static void -test_read_cmpd_filtered_dataset_type_conversion_unshared(void) +test_read_cmpd_filtered_dataset_type_conversion_unshared(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { COMPOUND_C_DATATYPE *read_buf = NULL; COMPOUND_C_DATATYPE *correct_buf = NULL; @@ -5554,8 +6486,10 @@ test_read_cmpd_filtered_dataset_type_conversion_unshared(void) hsize_t block[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS]; hsize_t flat_dims[1]; size_t i, read_buf_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1, filetype = -1, memtype = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t filetype = H5I_INVALID_HID, memtype = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; int * recvcounts = NULL; int * displs = NULL; @@ -5563,7 +6497,12 @@ test_read_cmpd_filtered_dataset_type_conversion_unshared(void) HDputs("Testing read from unshared filtered chunks in Compound Datatype dataset with Datatype " "conversion"); - CHECK_CUR_FILTER_AVAIL(); + /* SZIP and ScaleOffset filters don't support compound types */ + if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) { + if (MAINPROCESS) + SKIPPED(); + return; + } dataset_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_NROWS; dataset_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_NCOLS; @@ -5613,6 +6552,9 @@ test_read_cmpd_filtered_dataset_type_conversion_unshared(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ filespace = H5Screate_simple(READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS, dataset_dims, NULL); @@ -5622,46 +6564,45 @@ test_read_cmpd_filtered_dataset_type_conversion_unshared(void) chunk_dims[0] = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS; chunk_dims[1] = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS; - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME, + dset_id = H5Dcreate2(group_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME, filetype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Dwrite(dset_id, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); - dset_id = H5Dopen2(file_id, "/" READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME, - H5P_DEFAULT); + dset_id = + H5Dopen2(group_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); sel_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS; @@ -5701,18 +6642,12 @@ test_read_cmpd_filtered_dataset_type_conversion_unshared(void) VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), "Hyperslab selection succeeded"); - /* Create property list for collective dataset read */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - read_buf_size = flat_dims[0] * sizeof(*read_buf); read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, read_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - VRFY((H5Dread(dset_id, memtype, memspace, filespace, plist_id, read_buf) >= 0), "Dataset read succeeded"); + VRFY((H5Dread(dset_id, memtype, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); global_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != global_buf), "HDcalloc succeeded"); @@ -5752,7 +6687,7 @@ test_read_cmpd_filtered_dataset_type_conversion_unshared(void) VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); VRFY((H5Tclose(filetype) >= 0), "File datatype close succeeded"); VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -5772,7 +6707,8 @@ test_read_cmpd_filtered_dataset_type_conversion_unshared(void) * 05/17/2018 */ static void -test_read_cmpd_filtered_dataset_type_conversion_shared(void) +test_read_cmpd_filtered_dataset_type_conversion_shared(const char *parent_group, H5Z_filter_t filter_id, + hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id) { COMPOUND_C_DATATYPE *read_buf = NULL; COMPOUND_C_DATATYPE *correct_buf = NULL; @@ -5786,8 +6722,10 @@ test_read_cmpd_filtered_dataset_type_conversion_shared(void) hsize_t block[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS]; hsize_t flat_dims[1]; size_t i, read_buf_size, correct_buf_size; - hid_t file_id, dset_id, plist_id, filetype, memtype; - hid_t filespace, memspace; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t filetype = H5I_INVALID_HID, memtype = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; int * recvcounts = NULL; int * displs = NULL; @@ -5795,7 +6733,12 @@ test_read_cmpd_filtered_dataset_type_conversion_shared(void) HDputs( "Testing read from shared filtered chunks in Compound Datatype dataset with Datatype conversion"); - CHECK_CUR_FILTER_AVAIL(); + /* SZIP and ScaleOffset filters don't support compound types */ + if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) { + if (MAINPROCESS) + SKIPPED(); + return; + } dataset_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_NROWS; dataset_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_NCOLS; @@ -5851,6 +6794,9 @@ test_read_cmpd_filtered_dataset_type_conversion_shared(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ filespace = H5Screate_simple(READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS, dataset_dims, NULL); @@ -5860,46 +6806,45 @@ test_read_cmpd_filtered_dataset_type_conversion_shared(void) chunk_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS; chunk_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NCOLS; - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME, + dset_id = H5Dcreate2(group_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME, filetype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Dwrite(dset_id, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); dset_id = - H5Dopen2(file_id, "/" READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME, H5P_DEFAULT); + H5Dopen2(group_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); sel_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size; @@ -5939,18 +6884,12 @@ test_read_cmpd_filtered_dataset_type_conversion_shared(void) VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), "Hyperslab selection succeeded"); - /* Create property list for collective dataset read */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - read_buf_size = flat_dims[0] * sizeof(*read_buf); read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, read_buf_size); VRFY((NULL != read_buf), "HDcalloc succeeded"); - VRFY((H5Dread(dset_id, memtype, memspace, filespace, plist_id, read_buf) >= 0), "Dataset read succeeded"); + VRFY((H5Dread(dset_id, memtype, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); global_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size); VRFY((NULL != global_buf), "HDcalloc succeeded"); @@ -5989,7 +6928,7 @@ test_read_cmpd_filtered_dataset_type_conversion_shared(void) VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -6006,7 +6945,8 @@ test_read_cmpd_filtered_dataset_type_conversion_shared(void) * 08/03/2017 */ static void -test_write_serial_read_parallel(void) +test_write_serial_read_parallel(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *data = NULL; C_DATATYPE *read_buf = NULL; @@ -6014,14 +6954,13 @@ test_write_serial_read_parallel(void) hsize_t dataset_dims[WRITE_SERIAL_READ_PARALLEL_DATASET_DIMS]; hsize_t chunk_dims[WRITE_SERIAL_READ_PARALLEL_DATASET_DIMS]; size_t i, data_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing write file serially; read file in parallel"); - CHECK_CUR_FILTER_AVAIL(); - dataset_dims[0] = (hsize_t)WRITE_SERIAL_READ_PARALLEL_NROWS; dataset_dims[1] = (hsize_t)WRITE_SERIAL_READ_PARALLEL_NCOLS; dataset_dims[2] = (hsize_t)WRITE_SERIAL_READ_PARALLEL_DEPTH; @@ -6040,6 +6979,9 @@ test_write_serial_read_parallel(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + /* Create the dataspace for the dataset */ chunk_dims[0] = (hsize_t)WRITE_SERIAL_READ_PARALLEL_CH_NROWS; chunk_dims[1] = (hsize_t)WRITE_SERIAL_READ_PARALLEL_CH_NCOLS; @@ -6049,20 +6991,22 @@ test_write_serial_read_parallel(void) VRFY((filespace >= 0), "File dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_SERIAL_READ_PARALLEL_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, WRITE_SERIAL_READ_PARALLEL_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, + dset_id = H5Dcreate2(group_id, WRITE_SERIAL_READ_PARALLEL_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); data_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*data); @@ -6076,10 +7020,15 @@ test_write_serial_read_parallel(void) VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, data) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + if (data) HDfree(data); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); } @@ -6095,28 +7044,16 @@ test_write_serial_read_parallel(void) correct_buf[i] = (long)i; /* All ranks open the file and verify their "portion" of the dataset is correct */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); - dset_id = H5Dopen2(file_id, "/" WRITE_SERIAL_READ_PARALLEL_DATASET_NAME, H5P_DEFAULT); + dset_id = H5Dopen2(group_id, WRITE_SERIAL_READ_PARALLEL_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - - VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded"); VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); @@ -6127,13 +7064,13 @@ test_write_serial_read_parallel(void) HDfree(read_buf); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; } -#if MPI_VERSION >= 3 +#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES /* * Tests parallel write of filtered data * to a dataset. After the write has @@ -6145,7 +7082,8 @@ test_write_serial_read_parallel(void) * 08/03/2017 */ static void -test_write_parallel_read_serial(void) +test_write_parallel_read_serial(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, + hid_t dcpl_id, hid_t dxpl_id) { C_DATATYPE *data = NULL; C_DATATYPE *read_buf = NULL; @@ -6158,27 +7096,18 @@ test_write_parallel_read_serial(void) hsize_t block[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS]; hsize_t offset[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS]; size_t i, data_size, correct_buf_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing write file in parallel; read serially"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = (hsize_t)WRITE_PARALLEL_READ_SERIAL_NROWS; @@ -6198,20 +7127,22 @@ test_write_parallel_read_serial(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, WRITE_PARALLEL_READ_SERIAL_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, + dset_id = H5Dcreate2(group_id, WRITE_PARALLEL_READ_SERIAL_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); /* Each process defines the dataset selection in memory and writes @@ -6256,22 +7187,20 @@ test_write_parallel_read_serial(void) for (i = 0; i < data_size / sizeof(*data); i++) data[i] = (C_DATATYPE)GEN_DATA(i); - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - - VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0), + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0), "Dataset write succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + if (data) HDfree(data); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); if (MAINPROCESS) { @@ -6286,7 +7215,10 @@ test_write_parallel_read_serial(void) VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); - dset_id = H5Dopen2(file_id, "/" WRITE_PARALLEL_READ_SERIAL_DATASET_NAME, H5P_DEFAULT); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + + dset_id = H5Dopen2(group_id, WRITE_PARALLEL_READ_SERIAL_DATASET_NAME, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset open succeeded"); correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf); @@ -6307,6 +7239,7 @@ test_write_parallel_read_serial(void) VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); HDfree(correct_buf); @@ -6326,9 +7259,11 @@ test_write_parallel_read_serial(void) * 06/04/2018 */ static void -test_shrinking_growing_chunks(void) +test_shrinking_growing_chunks(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id, + hid_t dxpl_id) { - double *data = NULL; + double *data = NULL; + double *read_buf = NULL; hsize_t dataset_dims[SHRINKING_GROWING_CHUNKS_DATASET_DIMS]; hsize_t chunk_dims[SHRINKING_GROWING_CHUNKS_DATASET_DIMS]; hsize_t sel_dims[SHRINKING_GROWING_CHUNKS_DATASET_DIMS]; @@ -6337,27 +7272,18 @@ test_shrinking_growing_chunks(void) hsize_t count[SHRINKING_GROWING_CHUNKS_DATASET_DIMS]; hsize_t block[SHRINKING_GROWING_CHUNKS_DATASET_DIMS]; size_t i, data_size; - hid_t file_id = -1, dset_id = -1, plist_id = -1; - hid_t filespace = -1, memspace = -1; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID; if (MAINPROCESS) HDputs("Testing continually shrinking/growing chunks"); - CHECK_CUR_FILTER_AVAIL(); - - /* Set up file access property list with parallel I/O access */ - plist_id = H5Pcreate(H5P_FILE_ACCESS); - VRFY((plist_id >= 0), "FAPL creation succeeded"); - - VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); - - file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id); + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); VRFY((file_id >= 0), "Test file open succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded"); + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); /* Create the dataspace for the dataset */ dataset_dims[0] = (hsize_t)SHRINKING_GROWING_CHUNKS_NROWS; @@ -6374,19 +7300,21 @@ test_shrinking_growing_chunks(void) VRFY((memspace >= 0), "Memory dataspace creation succeeded"); /* Create chunked dataset */ - plist_id = H5Pcreate(H5P_DATASET_CREATE); - VRFY((plist_id >= 0), "DCPL creation succeeded"); + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); VRFY((H5Pset_chunk(plist_id, SHRINKING_GROWING_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); /* Add test filter to the pipeline */ - VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set"); + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); - dset_id = H5Dcreate2(file_id, SHRINKING_GROWING_CHUNKS_DATASET_NAME, H5T_NATIVE_DOUBLE, filespace, + dset_id = H5Dcreate2(group_id, SHRINKING_GROWING_CHUNKS_DATASET_NAME, H5T_NATIVE_DOUBLE, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); VRFY((dset_id >= 0), "Dataset creation succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); /* @@ -6417,39 +7345,1302 @@ test_shrinking_growing_chunks(void) VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), "Hyperslab selection succeeded"); - /* Create property list for collective dataset write */ - plist_id = H5Pcreate(H5P_DATASET_XFER); - VRFY((plist_id >= 0), "DXPL creation succeeded"); - - VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded"); - data_size = sel_dims[0] * sel_dims[1] * sizeof(double); data = (double *)HDcalloc(1, data_size); VRFY((NULL != data), "HDcalloc succeeded"); + read_buf = (double *)HDcalloc(1, data_size); + VRFY((NULL != read_buf), "HDcalloc succeeded"); + for (i = 0; i < SHRINKING_GROWING_CHUNKS_NLOOPS; i++) { /* Continually write random float data, followed by zeroed-out data */ - if ((i % 2)) + if (i % 2) HDmemset(data, 0, data_size); else { size_t j; for (j = 0; j < data_size / sizeof(*data); j++) { - data[j] = (float)(rand() / (double)(RAND_MAX / (double)1.0L)); + data[j] = (rand() / (double)(RAND_MAX / (double)1.0L)); } } - VRFY((H5Dwrite(dset_id, H5T_NATIVE_DOUBLE, memspace, filespace, plist_id, data) >= 0), + VRFY((H5Dwrite(dset_id, H5T_NATIVE_DOUBLE, memspace, filespace, dxpl_id, data) >= 0), "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + if (i % 2) { + HDmemset(read_buf, 255, data_size); + } + else { + HDmemset(read_buf, 0, data_size); + } + + VRFY((H5Dread(dset_id, H5T_NATIVE_DOUBLE, memspace, filespace, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + VRFY((0 == HDmemcmp(read_buf, data, data_size)), "data verification succeeded"); } + if (read_buf) + HDfree(read_buf); if (data) HDfree(data); + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded"); - VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); + VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); + + return; +} + +/* + * Tests that filtered and unfiltered partial edge chunks can be + * written to and read from correctly in parallel when only one MPI + * rank writes to a particular partial edge chunk in the dataset. + * + * The dataset contains partial edge chunks in the second dimension. + * Each MPI rank selects a hyperslab in the shape of a single chunk + * that is offset to cover the whole edge chunk and part of the + * full chunk next to the edge chunk. + */ +static void +test_edge_chunks_no_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id, + hid_t dxpl_id) +{ + C_DATATYPE *data = NULL; + C_DATATYPE *read_buf = NULL; + hsize_t dataset_dims[WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS]; + hsize_t chunk_dims[WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS]; + hsize_t sel_dims[WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS]; + hsize_t start[WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS]; + hsize_t stride[WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS]; + hsize_t count[WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS]; + hsize_t block[WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS]; + size_t i, data_size; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID; + + if (MAINPROCESS) + HDputs("Testing write to unshared filtered edge chunks"); + + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); + VRFY((file_id >= 0), "Test file open succeeded"); + + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + + /* Create the dataspace for the dataset */ + dataset_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_NROWS; + dataset_dims[1] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_NCOLS; + chunk_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS; + chunk_dims[1] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS; + sel_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS; + sel_dims[1] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS; + + filespace = H5Screate_simple(WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS, dataset_dims, NULL); + VRFY((filespace >= 0), "File dataspace creation succeeded"); + + /* Create chunked dataset */ + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); + + VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), + "Chunk size set"); + + /* Add test filter to the pipeline */ + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); + + dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, + filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset creation succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + + /* Each process defines the dataset selection in memory and writes + * it to the hyperslab in the file + */ + count[0] = 1; + count[1] = 1; + stride[0] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS; + stride[1] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS; + block[0] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS; + block[1] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS; + start[0] = ((hsize_t)mpi_rank * (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS); + start[1] = + (hsize_t)(WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_NCOLS - WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS); + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]); + HDfflush(stdout); + } + + /* Select hyperslab in the file */ + filespace = H5Dget_space(dset_id); + VRFY((filespace >= 0), "File dataspace retrieval succeeded"); + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + /* Fill data buffer */ + data_size = sel_dims[0] * sel_dims[1] * sizeof(*data); + + data = (C_DATATYPE *)HDcalloc(1, data_size); + VRFY((NULL != data), "HDcalloc succeeded"); + + read_buf = (C_DATATYPE *)HDcalloc(1, data_size); + VRFY((NULL != read_buf), "HDcalloc succeeded"); + + for (i = 0; i < data_size / sizeof(*data); i++) + data[i] = (C_DATATYPE)GEN_DATA(i); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, (mpi_size > 1) ? SOME_CHUNKS_WRITTEN : ALL_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + /* Verify the correct data was written */ + dset_id = H5Dopen2(group_id, WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded"); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + /* Repeat the previous, but set option to not filter partial edge chunks */ + if (MAINPROCESS) + HDputs("Testing write to unshared unfiltered edge chunks"); + + H5Pset_chunk_opts(plist_id, H5D_CHUNK_DONT_FILTER_PARTIAL_CHUNKS); + + dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME2, HDF5_DATATYPE_NAME, + filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset creation succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + + /* Each process defines the dataset selection in memory and writes + * it to the hyperslab in the file + */ + count[0] = 1; + count[1] = 1; + stride[0] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS; + stride[1] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS; + block[0] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS; + block[1] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS; + start[0] = ((hsize_t)mpi_rank * (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS); + start[1] = + (hsize_t)(WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_NCOLS - WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS); + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]); + HDfflush(stdout); + } + + /* Select hyperslab in the file */ + filespace = H5Dget_space(dset_id); + VRFY((filespace >= 0), "File dataspace retrieval succeeded"); + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, (mpi_size > 1) ? SOME_CHUNKS_WRITTEN : ALL_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + /* Verify the correct data was written */ + dset_id = H5Dopen2(group_id, WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME2, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + HDmemset(read_buf, 255, data_size); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded"); + + if (data) + HDfree(data); + if (read_buf) + HDfree(read_buf); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); + VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); + + return; +} + +/* + * Tests that filtered and unfiltered partial edge chunks can be + * written to and read from correctly in parallel when every MPI + * rank writes to every partial edge chunk in the dataset. + * + * The dataset contains partial edge chunks in the second dimension. + * Each MPI rank selects a hyperslab in the shape of one row of each + * chunk that is offset in the second dimension to cover the whole + * edge chunk and part of the full chunk next to the edge chunk. + */ +static void +test_edge_chunks_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id, + hid_t dxpl_id) +{ + C_DATATYPE *data = NULL; + C_DATATYPE *read_buf = NULL; + hsize_t dataset_dims[WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS]; + hsize_t chunk_dims[WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS]; + hsize_t sel_dims[WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS]; + hsize_t start[WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS]; + hsize_t stride[WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS]; + hsize_t count[WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS]; + hsize_t block[WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS]; + size_t i, data_size; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID; + + if (MAINPROCESS) + HDputs("Testing write to shared filtered edge chunks"); + + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); + VRFY((file_id >= 0), "Test file open succeeded"); + + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + + /* Create the dataspace for the dataset */ + dataset_dims[0] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_NROWS; + dataset_dims[1] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_NCOLS; + chunk_dims[0] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NROWS; + chunk_dims[1] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS; + sel_dims[0] = (hsize_t)DIM0_SCALE_FACTOR; + sel_dims[1] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS; + + filespace = H5Screate_simple(WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS, dataset_dims, NULL); + VRFY((filespace >= 0), "File dataspace creation succeeded"); + + /* Create chunked dataset */ + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); + + VRFY((H5Pset_chunk(plist_id, WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), + "Chunk size set"); + + /* Add test filter to the pipeline */ + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); + + dset_id = H5Dcreate2(group_id, WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, + filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset creation succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + + /* Each process defines the dataset selection in memory and writes + * it to the hyperslab in the file + */ + count[0] = + (hsize_t)(WRITE_SHARED_FILTERED_EDGE_CHUNKS_NROWS / WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NROWS); + count[1] = 1; + stride[0] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NROWS; + stride[1] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS; + block[0] = (hsize_t)1; + block[1] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS; + start[0] = (hsize_t)mpi_rank; + start[1] = + (hsize_t)(WRITE_SHARED_FILTERED_EDGE_CHUNKS_NCOLS - WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS); + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]); + HDfflush(stdout); + } + + /* Select hyperslab in the file */ + filespace = H5Dget_space(dset_id); + VRFY((filespace >= 0), "File dataspace retrieval succeeded"); + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + /* Fill data buffer */ + data_size = sel_dims[0] * sel_dims[1] * sizeof(*data); + + data = (C_DATATYPE *)HDcalloc(1, data_size); + VRFY((NULL != data), "HDcalloc succeeded"); + + read_buf = (C_DATATYPE *)HDcalloc(1, data_size); + VRFY((NULL != read_buf), "HDcalloc succeeded"); + + for (i = 0; i < data_size / sizeof(*data); i++) + data[i] = (C_DATATYPE)GEN_DATA(i); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + /* Verify the correct data was written */ + dset_id = H5Dopen2(group_id, WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded"); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + /* Repeat the previous, but set option to not filter partial edge chunks */ + if (MAINPROCESS) + HDputs("Testing write to shared unfiltered edge chunks"); + + H5Pset_chunk_opts(plist_id, H5D_CHUNK_DONT_FILTER_PARTIAL_CHUNKS); + + dset_id = H5Dcreate2(group_id, WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME2, HDF5_DATATYPE_NAME, + filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset creation succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + + /* Each process defines the dataset selection in memory and writes + * it to the hyperslab in the file + */ + count[0] = + (hsize_t)(WRITE_SHARED_FILTERED_EDGE_CHUNKS_NROWS / WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NROWS); + count[1] = 1; + stride[0] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NROWS; + stride[1] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS; + block[0] = (hsize_t)1; + block[1] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS; + start[0] = (hsize_t)mpi_rank; + start[1] = + (hsize_t)(WRITE_SHARED_FILTERED_EDGE_CHUNKS_NCOLS - WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS); + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]); + HDfflush(stdout); + } + + /* Select hyperslab in the file */ + filespace = H5Dget_space(dset_id); + VRFY((filespace >= 0), "File dataspace retrieval succeeded"); + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + /* Verify the correct data was written */ + dset_id = H5Dopen2(group_id, WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME2, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + HDmemset(read_buf, 255, data_size); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded"); + + if (data) + HDfree(data); + if (read_buf) + HDfree(read_buf); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); + VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); + + return; +} + +/* + * Tests that filtered and unfiltered partial edge chunks can be + * written to and read from correctly in parallel when only one + * MPI rank writes to a particular edge chunk in the dataset and + * only performs a partial write to the edge chunk. + * + * The dataset contains partial edge chunks in the second dimension. + * Each MPI rank selects a hyperslab in the shape of part of a single + * edge chunk and writes to just a portion of the edge chunk. + */ +static void +test_edge_chunks_partial_write(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id, + hid_t dxpl_id) +{ + /* TODO */ +} + +/* + * Tests that the parallel compression feature correctly handles + * writing fill values to a dataset and reading fill values from + * unallocated parts of a dataset. + */ +static void +test_fill_values(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id, + hid_t dxpl_id) +{ + C_DATATYPE *data = NULL; + C_DATATYPE *read_buf = NULL; + C_DATATYPE *correct_buf = NULL; + C_DATATYPE fill_value; + hsize_t dataset_dims[FILL_VALUES_TEST_DATASET_DIMS]; + hsize_t chunk_dims[FILL_VALUES_TEST_DATASET_DIMS]; + hsize_t sel_dims[FILL_VALUES_TEST_DATASET_DIMS]; + hsize_t start[FILL_VALUES_TEST_DATASET_DIMS]; + hsize_t stride[FILL_VALUES_TEST_DATASET_DIMS]; + hsize_t count[FILL_VALUES_TEST_DATASET_DIMS]; + hsize_t block[FILL_VALUES_TEST_DATASET_DIMS]; + size_t i, data_size, read_buf_size; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID; + int * recvcounts = NULL; + int * displs = NULL; + + if (MAINPROCESS) + HDputs("Testing fill values"); + + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); + VRFY((file_id >= 0), "Test file open succeeded"); + + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + + /* Create the dataspace for the dataset */ + dataset_dims[0] = (hsize_t)FILL_VALUES_TEST_NROWS; + dataset_dims[1] = (hsize_t)FILL_VALUES_TEST_NCOLS; + chunk_dims[0] = (hsize_t)FILL_VALUES_TEST_CH_NROWS; + chunk_dims[1] = (hsize_t)FILL_VALUES_TEST_CH_NCOLS; + sel_dims[0] = (hsize_t)DIM0_SCALE_FACTOR; + sel_dims[1] = (hsize_t)FILL_VALUES_TEST_CH_NCOLS * (hsize_t)DIM1_SCALE_FACTOR; + + filespace = H5Screate_simple(FILL_VALUES_TEST_DATASET_DIMS, dataset_dims, NULL); + VRFY((filespace >= 0), "File dataspace creation succeeded"); + + /* Create chunked dataset */ + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); + + VRFY((H5Pset_chunk(plist_id, FILL_VALUES_TEST_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); + + /* Add test filter to the pipeline */ + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); + + /* Set a fill value */ + fill_value = FILL_VALUES_TEST_FILL_VAL; + VRFY((H5Pset_fill_value(plist_id, HDF5_DATATYPE_NAME, &fill_value) >= 0), "Fill Value set"); + + dset_id = H5Dcreate2(group_id, FILL_VALUES_TEST_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, + plist_id, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset creation succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + + /* Allocate buffer for reading entire dataset */ + read_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*read_buf); + + read_buf = HDcalloc(1, read_buf_size); + VRFY((NULL != read_buf), "HDcalloc succeeded"); + + correct_buf = HDcalloc(1, read_buf_size); + VRFY((NULL != correct_buf), "HDcalloc succeeded"); + + /* Read entire dataset and verify that the fill value is returned */ + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + for (i = 0; i < read_buf_size / sizeof(*read_buf); i++) + correct_buf[i] = FILL_VALUES_TEST_FILL_VAL; + + VRFY((0 == HDmemcmp(read_buf, correct_buf, read_buf_size)), "Data verification succeeded"); + + /* + * Write to part of the first chunk in the dataset with + * all ranks, then read the whole dataset and ensure that + * the fill value is returned for the unwritten part of + * the chunk, as well as for the rest of the dataset that + * hasn't been written to yet. + */ + count[0] = 1; + count[1] = 1; + stride[0] = (hsize_t)FILL_VALUES_TEST_CH_NROWS; + stride[1] = (hsize_t)FILL_VALUES_TEST_CH_NCOLS; + block[0] = 1; + block[1] = (hsize_t)(FILL_VALUES_TEST_CH_NCOLS - 1); + start[0] = (hsize_t)mpi_rank; + start[1] = 0; + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]); + HDfflush(stdout); + } + + /* Select hyperslab in the file */ + filespace = H5Dget_space(dset_id); + VRFY((filespace >= 0), "File dataspace retrieval succeeded"); + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + /* Fill data buffer */ + data_size = sel_dims[0] * sel_dims[1] * sizeof(*data); + + data = (C_DATATYPE *)HDcalloc(1, data_size); + VRFY((NULL != data), "HDcalloc succeeded"); + + for (i = 0; i < data_size / sizeof(*data); i++) + data[i] = (C_DATATYPE)GEN_DATA(i); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + /* Verify correct data was written */ + dset_id = H5Dopen2(group_id, FILL_VALUES_TEST_DATASET_NAME, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + /* + * Each MPI rank communicates their written piece of data + * into each other rank's correctness-checking buffer + */ + recvcounts = HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts)); + VRFY((NULL != recvcounts), "HDcalloc succeeded"); + + displs = HDcalloc(1, (size_t)mpi_size * sizeof(*displs)); + VRFY((NULL != displs), "HDcalloc succeeded"); + + for (i = 0; i < (size_t)mpi_size; i++) { + recvcounts[i] = (int)(count[1] * block[1]); + displs[i] = (int)(i * dataset_dims[1]); + } + + VRFY((MPI_SUCCESS == MPI_Allgatherv(data, recvcounts[mpi_rank], C_DATATYPE_MPI, correct_buf, recvcounts, + displs, C_DATATYPE_MPI, comm)), + "MPI_Allgatherv succeeded"); + + VRFY((0 == HDmemcmp(read_buf, correct_buf, read_buf_size)), "Data verification succeeded"); + + /* + * Write to whole dataset and ensure fill value isn't returned + * after reading whole dataset back + */ + + /* Each process defines the dataset selection in memory and writes + * it to the hyperslab in the file + */ + count[0] = (hsize_t)FILL_VALUES_TEST_NROWS / (hsize_t)FILL_VALUES_TEST_CH_NROWS; + count[1] = (hsize_t)FILL_VALUES_TEST_NCOLS / (hsize_t)FILL_VALUES_TEST_CH_NCOLS; + stride[0] = (hsize_t)FILL_VALUES_TEST_CH_NROWS; + stride[1] = (hsize_t)FILL_VALUES_TEST_CH_NCOLS; + block[0] = (hsize_t)FILL_VALUES_TEST_CH_NROWS / (hsize_t)mpi_size; + block[1] = (hsize_t)FILL_VALUES_TEST_CH_NCOLS; + start[0] = (hsize_t)mpi_rank * block[0]; + start[1] = 0; + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]); + HDfflush(stdout); + } + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + /* Verify correct data was written */ + dset_id = H5Dopen2(group_id, FILL_VALUES_TEST_DATASET_NAME, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + for (i = 0; i < read_buf_size / sizeof(*read_buf); i++) + VRFY((read_buf[i] != FILL_VALUES_TEST_FILL_VAL), "Data verification succeeded"); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + /******************************************************************** + * Set the fill time to H5D_FILL_TIME_ALLOC and repeat the previous * + ********************************************************************/ + + VRFY((H5Pset_fill_time(plist_id, H5D_FILL_TIME_ALLOC) >= 0), "H5Pset_fill_time succeeded"); + + dset_id = H5Dcreate2(group_id, FILL_VALUES_TEST_DATASET_NAME2, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, + plist_id, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset creation succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + + /* Read entire dataset and verify that the fill value is returned */ + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + for (i = 0; i < read_buf_size / sizeof(*read_buf); i++) + correct_buf[i] = FILL_VALUES_TEST_FILL_VAL; + + VRFY((0 == HDmemcmp(read_buf, correct_buf, read_buf_size)), "Data verification succeeded"); + + /* + * Write to part of the first chunk in the dataset with + * all ranks, then read the whole dataset and ensure that + * the fill value is returned for the unwritten part of + * the chunk, as well as for the rest of the dataset that + * hasn't been written to yet. + */ + count[0] = 1; + count[1] = 1; + stride[0] = (hsize_t)FILL_VALUES_TEST_CH_NROWS; + stride[1] = (hsize_t)FILL_VALUES_TEST_CH_NCOLS; + block[0] = 1; + block[1] = (hsize_t)(FILL_VALUES_TEST_CH_NCOLS - 1); + start[0] = (hsize_t)mpi_rank; + start[1] = 0; + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]); + HDfflush(stdout); + } + + /* Select hyperslab in the file */ + filespace = H5Dget_space(dset_id); + VRFY((filespace >= 0), "File dataspace retrieval succeeded"); + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + for (i = 0; i < data_size / sizeof(*data); i++) + data[i] = (C_DATATYPE)GEN_DATA(i); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + /* Verify correct data was written */ + dset_id = H5Dopen2(group_id, FILL_VALUES_TEST_DATASET_NAME2, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + for (i = 0; i < (size_t)mpi_size; i++) { + recvcounts[i] = (int)(count[1] * block[1]); + displs[i] = (int)(i * dataset_dims[1]); + } + + /* + * Each MPI rank communicates their written piece of data + * into each other rank's correctness-checking buffer + */ + VRFY((MPI_SUCCESS == MPI_Allgatherv(data, recvcounts[mpi_rank], C_DATATYPE_MPI, correct_buf, recvcounts, + displs, C_DATATYPE_MPI, comm)), + "MPI_Allgatherv succeeded"); + + VRFY((0 == HDmemcmp(read_buf, correct_buf, read_buf_size)), "Data verification succeeded"); + + /* + * Write to whole dataset and ensure fill value isn't returned + * after reading whole dataset back + */ + + /* Each process defines the dataset selection in memory and writes + * it to the hyperslab in the file + */ + count[0] = (hsize_t)FILL_VALUES_TEST_NROWS / (hsize_t)FILL_VALUES_TEST_CH_NROWS; + count[1] = (hsize_t)FILL_VALUES_TEST_NCOLS / (hsize_t)FILL_VALUES_TEST_CH_NCOLS; + stride[0] = (hsize_t)FILL_VALUES_TEST_CH_NROWS; + stride[1] = (hsize_t)FILL_VALUES_TEST_CH_NCOLS; + block[0] = (hsize_t)FILL_VALUES_TEST_CH_NROWS / (hsize_t)mpi_size; + block[1] = (hsize_t)FILL_VALUES_TEST_CH_NCOLS; + start[0] = (hsize_t)mpi_rank * block[0]; + start[1] = 0; + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]); + HDfflush(stdout); + } + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + /* Verify correct data was written */ + dset_id = H5Dopen2(group_id, FILL_VALUES_TEST_DATASET_NAME2, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + for (i = 0; i < read_buf_size / sizeof(*read_buf); i++) + VRFY((read_buf[i] != FILL_VALUES_TEST_FILL_VAL), "Data verification succeeded"); + + if (displs) + HDfree(displs); + if (recvcounts) + HDfree(recvcounts); + if (data) + HDfree(data); + if (read_buf) + HDfree(read_buf); + if (correct_buf) + HDfree(correct_buf); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); + VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); + + return; +} + +/* + * Tests that the parallel compression feature can handle + * an undefined fill value. Nothing is verified in this + * test since the fill value isn't defined. + */ +static void +test_fill_value_undefined(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id, + hid_t dxpl_id) +{ + H5D_alloc_time_t alloc_time; + C_DATATYPE * data = NULL; + C_DATATYPE * read_buf = NULL; + hsize_t dataset_dims[FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS]; + hsize_t chunk_dims[FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS]; + hsize_t sel_dims[FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS]; + hsize_t start[FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS]; + hsize_t stride[FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS]; + hsize_t count[FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS]; + hsize_t block[FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS]; + size_t i, data_size, read_buf_size; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID; + + if (MAINPROCESS) + HDputs("Testing undefined fill value"); + + VRFY((H5Pget_alloc_time(dcpl_id, &alloc_time) >= 0), "H5Pget_alloc_time succeeded"); + + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); + VRFY((file_id >= 0), "Test file open succeeded"); + + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + + /* Create the dataspace for the dataset */ + dataset_dims[0] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_NROWS; + dataset_dims[1] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_NCOLS; + chunk_dims[0] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NROWS; + chunk_dims[1] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NCOLS; + sel_dims[0] = (hsize_t)DIM0_SCALE_FACTOR; + sel_dims[1] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NCOLS * (hsize_t)DIM1_SCALE_FACTOR; + + filespace = H5Screate_simple(FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS, dataset_dims, NULL); + VRFY((filespace >= 0), "File dataspace creation succeeded"); + + /* Create chunked dataset */ + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); + + VRFY((H5Pset_chunk(plist_id, FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); + + /* Add test filter to the pipeline */ + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); + + /* Set an undefined fill value */ + VRFY((H5Pset_fill_value(plist_id, HDF5_DATATYPE_NAME, NULL) >= 0), "Fill Value set"); + + dset_id = H5Dcreate2(group_id, FILL_VALUE_UNDEFINED_TEST_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, + H5P_DEFAULT, plist_id, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset creation succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + + /* Allocate buffer for reading entire dataset */ + read_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*read_buf); + + read_buf = HDcalloc(1, read_buf_size); + VRFY((NULL != read_buf), "HDcalloc succeeded"); + + /* + * Read entire dataset - nothing to verify since there's no fill value. + * If not using early space allocation, the read should fail since storage + * isn't allocated yet and no fill value is defined. + */ + if (alloc_time == H5D_ALLOC_TIME_EARLY) { + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + } + else { + H5E_BEGIN_TRY + { + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) < 0), + "Dataset read succeeded"); + } + H5E_END_TRY; + } + + /* + * Write to part of the first chunk in the dataset with + * all ranks, then read the whole dataset. Don't verify + * anything since there's no fill value defined. + */ + count[0] = 1; + count[1] = 1; + stride[0] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NROWS; + stride[1] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NCOLS; + block[0] = 1; + block[1] = (hsize_t)(FILL_VALUE_UNDEFINED_TEST_CH_NCOLS - 1); + start[0] = (hsize_t)mpi_rank; + start[1] = 0; + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]); + HDfflush(stdout); + } + + /* Select hyperslab in the file */ + filespace = H5Dget_space(dset_id); + VRFY((filespace >= 0), "File dataspace retrieval succeeded"); + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + /* Fill data buffer */ + data_size = sel_dims[0] * sel_dims[1] * sizeof(*data); + + data = (C_DATATYPE *)HDcalloc(1, data_size); + VRFY((NULL != data), "HDcalloc succeeded"); + + for (i = 0; i < data_size / sizeof(*data); i++) + data[i] = (C_DATATYPE)GEN_DATA(i); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + dset_id = H5Dopen2(group_id, FILL_VALUE_UNDEFINED_TEST_DATASET_NAME, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + /* + * Write to whole dataset and ensure data is correct + * after reading whole dataset back + */ + + /* Each process defines the dataset selection in memory and writes + * it to the hyperslab in the file + */ + count[0] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_NROWS / (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NROWS; + count[1] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_NCOLS / (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NCOLS; + stride[0] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NROWS; + stride[1] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NCOLS; + block[0] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NROWS / (hsize_t)mpi_size; + block[1] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NCOLS; + start[0] = (hsize_t)mpi_rank * block[0]; + start[1] = 0; + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]); + HDfflush(stdout); + } + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + /* Verify correct data was written */ + dset_id = H5Dopen2(group_id, FILL_VALUE_UNDEFINED_TEST_DATASET_NAME, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + if (data) + HDfree(data); + if (read_buf) + HDfree(read_buf); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); + VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); + + return; +} + +/* + * Tests that the parallel compression feature correctly handles + * avoiding writing fill values to a dataset when the fill time + * is set as H5D_FILL_TIME_NEVER. + */ +static void +test_fill_time_never(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id, + hid_t dxpl_id) +{ + C_DATATYPE *data = NULL; + C_DATATYPE *read_buf = NULL; + C_DATATYPE *fill_buf = NULL; + C_DATATYPE fill_value; + hsize_t dataset_dims[FILL_TIME_NEVER_TEST_DATASET_DIMS]; + hsize_t chunk_dims[FILL_TIME_NEVER_TEST_DATASET_DIMS]; + hsize_t sel_dims[FILL_TIME_NEVER_TEST_DATASET_DIMS]; + hsize_t start[FILL_TIME_NEVER_TEST_DATASET_DIMS]; + hsize_t stride[FILL_TIME_NEVER_TEST_DATASET_DIMS]; + hsize_t count[FILL_TIME_NEVER_TEST_DATASET_DIMS]; + hsize_t block[FILL_TIME_NEVER_TEST_DATASET_DIMS]; + size_t i, data_size, read_buf_size; + hid_t file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t filespace = H5I_INVALID_HID; + int * recvcounts = NULL; + int * displs = NULL; + + if (MAINPROCESS) + HDputs("Testing fill time H5D_FILL_TIME_NEVER"); + + /* + * Only run this test when incremental file space allocation is + * used, as HDF5's chunk allocation code always writes fill values + * when filters are in the pipeline, but parallel compression does + * incremental file space allocation differently. + */ + { + H5D_alloc_time_t alloc_time; + + VRFY((H5Pget_alloc_time(dcpl_id, &alloc_time) >= 0), "H5Pget_alloc_time succeeded"); + + if (alloc_time != H5D_ALLOC_TIME_INCR) { + if (MAINPROCESS) + SKIPPED(); + return; + } + } + + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); + VRFY((file_id >= 0), "Test file open succeeded"); + + group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gopen2 succeeded"); + + /* Create the dataspace for the dataset */ + dataset_dims[0] = (hsize_t)FILL_TIME_NEVER_TEST_NROWS; + dataset_dims[1] = (hsize_t)FILL_TIME_NEVER_TEST_NCOLS; + chunk_dims[0] = (hsize_t)FILL_TIME_NEVER_TEST_CH_NROWS; + chunk_dims[1] = (hsize_t)FILL_TIME_NEVER_TEST_CH_NCOLS; + sel_dims[0] = (hsize_t)DIM0_SCALE_FACTOR; + sel_dims[1] = (hsize_t)FILL_TIME_NEVER_TEST_CH_NCOLS * (hsize_t)DIM1_SCALE_FACTOR; + + filespace = H5Screate_simple(FILL_TIME_NEVER_TEST_DATASET_DIMS, dataset_dims, NULL); + VRFY((filespace >= 0), "File dataspace creation succeeded"); + + /* Create chunked dataset */ + plist_id = H5Pcopy(dcpl_id); + VRFY((plist_id >= 0), "DCPL copy succeeded"); + + VRFY((H5Pset_chunk(plist_id, FILL_TIME_NEVER_TEST_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set"); + + /* Add test filter to the pipeline */ + VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set"); + + /* Set a fill value */ + fill_value = FILL_VALUES_TEST_FILL_VAL; + VRFY((H5Pset_fill_value(plist_id, HDF5_DATATYPE_NAME, &fill_value) >= 0), "Fill Value set"); + + /* Set fill time of 'never' */ + VRFY((H5Pset_fill_time(plist_id, H5D_FILL_TIME_NEVER) >= 0), "H5Pset_fill_time succeeded"); + + dset_id = H5Dcreate2(group_id, FILL_TIME_NEVER_TEST_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, + H5P_DEFAULT, plist_id, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset creation succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED); + + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + + /* Allocate buffer for reading entire dataset */ + read_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*read_buf); + + read_buf = HDcalloc(1, read_buf_size); + VRFY((NULL != read_buf), "HDcalloc succeeded"); + + fill_buf = HDcalloc(1, read_buf_size); + VRFY((NULL != fill_buf), "HDcalloc succeeded"); + + /* Read entire dataset and verify that the fill value isn't returned */ + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + for (i = 0; i < read_buf_size / sizeof(*read_buf); i++) + fill_buf[i] = FILL_TIME_NEVER_TEST_FILL_VAL; + + /* + * It should be very unlikely for the dataset's random + * values to all be the fill value, so this should be + * a safe comparison in theory. + */ + VRFY((0 != HDmemcmp(read_buf, fill_buf, read_buf_size)), "Data verification succeeded"); + + /* + * Write to part of the first chunk in the dataset with + * all ranks, then read the whole dataset and ensure that + * the fill value isn't returned for the unwritten part of + * the chunk, as well as for the rest of the dataset that + * hasn't been written to yet. + */ + count[0] = 1; + count[1] = 1; + stride[0] = (hsize_t)FILL_TIME_NEVER_TEST_CH_NROWS; + stride[1] = (hsize_t)FILL_TIME_NEVER_TEST_CH_NCOLS; + block[0] = 1; + block[1] = (hsize_t)(FILL_TIME_NEVER_TEST_CH_NCOLS - 1); + start[0] = (hsize_t)mpi_rank; + start[1] = 0; + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]); + HDfflush(stdout); + } + + /* Select hyperslab in the file */ + filespace = H5Dget_space(dset_id); + VRFY((filespace >= 0), "File dataspace retrieval succeeded"); + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + /* Fill data buffer */ + data_size = sel_dims[0] * sel_dims[1] * sizeof(*data); + + data = (C_DATATYPE *)HDcalloc(1, data_size); + VRFY((NULL != data), "HDcalloc succeeded"); + + for (i = 0; i < data_size / sizeof(*data); i++) + data[i] = (C_DATATYPE)GEN_DATA(i); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + /* Verify correct data was written */ + dset_id = H5Dopen2(group_id, FILL_TIME_NEVER_TEST_DATASET_NAME, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + /* + * Each MPI rank communicates their written piece of data + * into each other rank's correctness-checking buffer + */ + recvcounts = HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts)); + VRFY((NULL != recvcounts), "HDcalloc succeeded"); + + displs = HDcalloc(1, (size_t)mpi_size * sizeof(*displs)); + VRFY((NULL != displs), "HDcalloc succeeded"); + + for (i = 0; i < (size_t)mpi_size; i++) { + recvcounts[i] = (int)(count[1] * block[1]); + displs[i] = (int)(i * dataset_dims[1]); + } + + VRFY((MPI_SUCCESS == MPI_Allgatherv(data, recvcounts[mpi_rank], C_DATATYPE_MPI, fill_buf, recvcounts, + displs, C_DATATYPE_MPI, comm)), + "MPI_Allgatherv succeeded"); + + /* + * It should be very unlikely for the dataset's random + * values to all be the fill value, so this should be + * a safe comparison in theory. + */ + VRFY((0 != HDmemcmp(read_buf, fill_buf, read_buf_size)), "Data verification succeeded"); + + /* + * Write to whole dataset and ensure fill value isn't returned + * after reading whole dataset back + */ + + /* Each process defines the dataset selection in memory and writes + * it to the hyperslab in the file + */ + count[0] = (hsize_t)FILL_TIME_NEVER_TEST_NROWS / (hsize_t)FILL_TIME_NEVER_TEST_CH_NROWS; + count[1] = (hsize_t)FILL_TIME_NEVER_TEST_NCOLS / (hsize_t)FILL_TIME_NEVER_TEST_CH_NCOLS; + stride[0] = (hsize_t)FILL_TIME_NEVER_TEST_CH_NROWS; + stride[1] = (hsize_t)FILL_TIME_NEVER_TEST_CH_NCOLS; + block[0] = (hsize_t)FILL_TIME_NEVER_TEST_CH_NROWS / (hsize_t)mpi_size; + block[1] = (hsize_t)FILL_TIME_NEVER_TEST_CH_NCOLS; + start[0] = (hsize_t)mpi_rank * block[0]; + start[1] = 0; + + if (VERBOSE_MED) { + HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE + ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE + ", %" PRIuHSIZE " ]\n", + mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]); + HDfflush(stdout); + } + + VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0), + "Hyperslab selection succeeded"); + + VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0), + "Dataset write succeeded"); + + /* Verify space allocation status */ + verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN); + + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + + /* Verify correct data was written */ + dset_id = H5Dopen2(group_id, FILL_TIME_NEVER_TEST_DATASET_NAME, H5P_DEFAULT); + VRFY((dset_id >= 0), "Dataset open succeeded"); + + VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), + "Dataset read succeeded"); + + for (i = 0; i < read_buf_size / sizeof(*read_buf); i++) + VRFY((read_buf[i] != FILL_TIME_NEVER_TEST_FILL_VAL), "Data verification succeeded"); + + if (displs) + HDfree(displs); + if (recvcounts) + HDfree(recvcounts); + if (data) + HDfree(data); + if (read_buf) + HDfree(read_buf); + if (fill_buf) + HDfree(fill_buf); + + VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded"); + VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded"); + VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded"); + VRFY((H5Gclose(group_id) >= 0), "Group close succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); return; @@ -6459,8 +8650,14 @@ test_shrinking_growing_chunks(void) int main(int argc, char **argv) { - size_t i; - hid_t file_id = -1, fapl = -1; + size_t cur_filter_idx = 0; + size_t num_filters = 0; + hid_t file_id = H5I_INVALID_HID; + hid_t fcpl_id = H5I_INVALID_HID; + hid_t group_id = H5I_INVALID_HID; + hid_t fapl_id = H5I_INVALID_HID; + hid_t dxpl_id = H5I_INVALID_HID; + hid_t dcpl_id = H5I_INVALID_HID; int mpi_code; /* Initialize MPI */ @@ -6487,7 +8684,7 @@ main(int argc, char **argv) if (MAINPROCESS) { HDprintf("==========================\n"); - HDprintf("Parallel Filters tests\n"); + HDprintf(" Parallel Filters tests\n"); HDprintf("==========================\n\n"); } @@ -6496,72 +8693,161 @@ main(int argc, char **argv) TestAlarmOn(); - /* Create test file */ - fapl = H5Pcreate(H5P_FILE_ACCESS); - VRFY((fapl >= 0), "FAPL creation succeeded"); + num_filters = ARRAY_SIZE(filterIDs); - VRFY((H5Pset_fapl_mpio(fapl, comm, info) >= 0), "Set FAPL MPIO succeeded"); + /* Set up file access property list with parallel I/O access, + * collective metadata reads/writes and the latest library + * version bounds */ + fapl_id = H5Pcreate(H5P_FILE_ACCESS); + VRFY((fapl_id >= 0), "FAPL creation succeeded"); - VRFY((H5Pset_libver_bounds(fapl, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); + VRFY((H5Pset_fapl_mpio(fapl_id, comm, info) >= 0), "Set FAPL MPIO succeeded"); + VRFY((H5Pset_all_coll_metadata_ops(fapl_id, TRUE) >= 0), "H5Pset_all_coll_metadata_ops succeeded"); + VRFY((H5Pset_coll_metadata_write(fapl_id, TRUE) >= 0), "H5Pset_coll_metadata_write succeeded"); - VRFY((h5_fixname(FILENAME[0], fapl, filenames[0], sizeof(filenames[0])) != NULL), - "Test file name created"); - - file_id = H5Fcreate(filenames[0], H5F_ACC_TRUNC, H5P_DEFAULT, fapl); - VRFY((file_id >= 0), "Test file creation succeeded"); - - VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); - - for (i = 0; i < ARRAY_SIZE(tests); i++) { - if (MPI_SUCCESS == (mpi_code = MPI_Barrier(comm))) { - (*tests[i])(); - } - else { - if (MAINPROCESS) - MESG("MPI_Barrier failed"); - nerrors++; - } - } + VRFY((H5Pset_libver_bounds(fapl_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), + "Set libver bounds succeeded"); /* - * Increment the filter index to switch to the checksum filter - * and re-run the tests. + * Set up Paged and Persistent Free Space Management */ - cur_filter_idx++; - - h5_clean_files(FILENAME, fapl); + fcpl_id = H5Pcreate(H5P_FILE_CREATE); + VRFY((fcpl_id >= 0), "FCPL creation succeeded"); - fapl = H5Pcreate(H5P_FILE_ACCESS); - VRFY((fapl >= 0), "FAPL creation succeeded"); + VRFY((H5Pset_file_space_strategy(fcpl_id, H5F_FSPACE_STRATEGY_PAGE, TRUE, 1) >= 0), + "H5Pset_file_space_strategy succeeded"); - VRFY((H5Pset_fapl_mpio(fapl, comm, info) >= 0), "Set FAPL MPIO succeeded"); - - VRFY((H5Pset_libver_bounds(fapl, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0), - "Set libver bounds succeeded"); + VRFY((h5_fixname(FILENAME[0], fapl_id, filenames[0], sizeof(filenames[0])) != NULL), + "Test file name created"); - file_id = H5Fcreate(filenames[0], H5F_ACC_TRUNC, H5P_DEFAULT, fapl); + file_id = H5Fcreate(filenames[0], H5F_ACC_TRUNC, fcpl_id, fapl_id); VRFY((file_id >= 0), "Test file creation succeeded"); VRFY((H5Fclose(file_id) >= 0), "File close succeeded"); + file_id = H5I_INVALID_HID; - if (MAINPROCESS) { - HDprintf("\n=================================================================\n"); - HDprintf("Re-running Parallel Filters tests with Fletcher32 checksum filter\n"); - HDprintf("=================================================================\n\n"); - } - - for (i = 0; i < ARRAY_SIZE(tests); i++) { - if (MPI_SUCCESS == (mpi_code = MPI_Barrier(comm))) { - (*tests[i])(); - } - else { - if (MAINPROCESS) - MESG("MPI_Barrier failed"); - nerrors++; + /* Create property list for collective dataset write */ + dxpl_id = H5Pcreate(H5P_DATASET_XFER); + VRFY((dxpl_id >= 0), "DXPL creation succeeded"); + + VRFY((H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE) >= 0), "H5Pset_dxpl_mpio succeeded"); + + /* Create DCPL for dataset creation */ + dcpl_id = H5Pcreate(H5P_DATASET_CREATE); + VRFY((dcpl_id >= 0), "DCPL creation succeeded"); + + /* Run tests with all available filters */ + for (cur_filter_idx = 0; cur_filter_idx < num_filters; cur_filter_idx++) { + H5FD_mpio_chunk_opt_t chunk_opt; + H5Z_filter_t cur_filter = filterIDs[cur_filter_idx]; + + /* Run tests with both linked-chunk and multi-chunk I/O */ + for (chunk_opt = H5FD_MPIO_CHUNK_ONE_IO; chunk_opt <= H5FD_MPIO_CHUNK_MULTI_IO; chunk_opt++) { + H5D_alloc_time_t space_alloc_time; + + /* Run tests with all available space allocation times */ + for (space_alloc_time = H5D_ALLOC_TIME_EARLY; space_alloc_time <= H5D_ALLOC_TIME_INCR; + space_alloc_time++) { + const char *alloc_time; + unsigned filter_config; + htri_t filter_avail; + size_t i; + char group_name[512]; + + switch (space_alloc_time) { + case H5D_ALLOC_TIME_EARLY: + alloc_time = "Early"; + break; + case H5D_ALLOC_TIME_LATE: + alloc_time = "Late"; + break; + case H5D_ALLOC_TIME_INCR: + alloc_time = "Incremental"; + break; + default: + alloc_time = "Unknown"; + } + + if (MAINPROCESS) + HDprintf("== Running tests with filter '%s' using '%s' and '%s' allocation time ==\n\n", + filterNames[cur_filter_idx], + H5FD_MPIO_CHUNK_ONE_IO == chunk_opt ? "Linked-Chunk I/O" : "Multi-Chunk I/O", + alloc_time); + + /* Make sure current filter is available before testing with it */ + filter_avail = H5Zfilter_avail(cur_filter); + VRFY((filter_avail >= 0), "H5Zfilter_avail succeeded"); + + if (!filter_avail) { + if (MAINPROCESS) + HDprintf(" ** SKIPPED tests with filter '%s' - filter unavailable **\n\n", + filterNames[cur_filter_idx]); + continue; + } + + /* Get the current filter's info */ + VRFY((H5Zget_filter_info(cur_filter, &filter_config) >= 0), "H5Zget_filter_info succeeded"); + + /* Determine if filter is encode-enabled */ + if (0 == (filter_config & H5Z_FILTER_CONFIG_ENCODE_ENABLED)) { + if (MAINPROCESS) + HDprintf(" ** SKIPPED tests with filter '%s' - filter not encode-enabled **\n\n", + filterNames[cur_filter_idx]); + continue; + } + + /* Set space allocation time */ + VRFY((H5Pset_alloc_time(dcpl_id, space_alloc_time) >= 0), "H5Pset_alloc_time succeeded"); + + /* Set chunk I/O optimization method */ + VRFY((H5Pset_dxpl_mpio_chunk_opt(dxpl_id, chunk_opt) >= 0), + "H5Pset_dxpl_mpio_chunk_opt succeeded"); + + /* Create a group to hold all the datasets for this combination + * of filter and chunk optimization mode. Then, close the file + * again since some tests may need to open the file in a special + * way, like on rank 0 only */ + file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id); + VRFY((file_id >= 0), "H5Fopen succeeded"); + + HDsnprintf(group_name, sizeof(group_name), "%s_%s_%s", filterNames[cur_filter_idx], + H5FD_MPIO_CHUNK_ONE_IO == chunk_opt ? "linked-chunk-io" : "multi-chunk-io", + alloc_time); + + group_id = H5Gcreate2(file_id, group_name, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + VRFY((group_id >= 0), "H5Gcreate2 succeeded"); + + VRFY((H5Gclose(group_id) >= 0), "H5Gclose failed"); + group_id = H5I_INVALID_HID; + + VRFY((H5Fclose(file_id) >= 0), "H5Fclose succeeded"); + file_id = H5I_INVALID_HID; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + test_func func = tests[i]; + + if (MPI_SUCCESS == (mpi_code = MPI_Barrier(comm))) { + func(group_name, cur_filter, fapl_id, dcpl_id, dxpl_id); + } + else { + if (MAINPROCESS) + MESG("MPI_Barrier failed"); + nerrors++; + } + } + + if (MAINPROCESS) + HDputs(""); + } } } + VRFY((H5Pclose(dcpl_id) >= 0), "DCPL close succeeded"); + dcpl_id = H5I_INVALID_HID; + + VRFY((H5Pclose(dxpl_id) >= 0), "DXPL close succeeded"); + dxpl_id = H5I_INVALID_HID; + if (nerrors) goto exit; @@ -6575,7 +8861,21 @@ exit: TestAlarmOff(); - h5_clean_files(FILENAME, fapl); + h5_clean_files(FILENAME, fapl_id); + fapl_id = H5I_INVALID_HID; + + if (dcpl_id >= 0) + VRFY((H5Pclose(dcpl_id) >= 0), "H5Pclose succeeded"); + if (dxpl_id >= 0) + VRFY((H5Pclose(dxpl_id) >= 0), "H5Pclose succeeded"); + if (fapl_id >= 0) + VRFY((H5Pclose(fapl_id) >= 0), "H5Pclose succeeded"); + if (fcpl_id >= 0) + VRFY((H5Pclose(fcpl_id) >= 0), "H5Pclose succeeded"); + if (group_id >= 0) + VRFY((H5Gclose(group_id) >= 0), "H5Gclose succeeded"); + if (file_id >= 0) + VRFY((H5Fclose(file_id) >= 0), "H5Fclose succeeded"); H5close(); diff --git a/testpar/t_filters_parallel.h b/testpar/t_filters_parallel.h index 7eb34ed..800604c 100644 --- a/testpar/t_filters_parallel.h +++ b/testpar/t_filters_parallel.h @@ -30,23 +30,23 @@ #include "stdlib.h" #include "testpar.h" +#define ARRAY_SIZE(a) sizeof(a) / sizeof(a[0]) + /* Used to load other filters than GZIP */ /* #define DYNAMIC_FILTER */ /* Uncomment and define the fields below to use a dynamically loaded filter */ + +#ifdef DYNAMIC_FILTER #define FILTER_NUM_CDVALUES 1 const unsigned int cd_values[FILTER_NUM_CDVALUES] = {0}; -H5Z_filter_t filter_id; -unsigned int flags = 0; -size_t cd_nelmts = FILTER_NUM_CDVALUES; - -/* Utility Macros */ -#define STRINGIFY(type) #type +unsigned int flags = 0; +size_t cd_nelmts = FILTER_NUM_CDVALUES; +#endif /* Common defines for all tests */ -#define C_DATATYPE long -#define C_DATATYPE_MPI MPI_LONG -#define COMPOUND_C_DATATYPE cmpd_filtered_t -#define C_DATATYPE_STR(type) STRINGIFY(type) -#define HDF5_DATATYPE_NAME H5T_NATIVE_LONG +#define C_DATATYPE long +#define C_DATATYPE_MPI MPI_LONG +#define COMPOUND_C_DATATYPE cmpd_filtered_t +#define HDF5_DATATYPE_NAME H5T_NATIVE_LONG /* Macro used to generate data for datasets for later verification */ #define GEN_DATA(i) INCREMENTAL_DATA(i) @@ -59,7 +59,7 @@ size_t cd_nelmts = FILTER_NUM_CDVALUES; #define RANK_DATA(i) \ (mpi_rank) /* Generates test data to visibly show which rank wrote to which parts of the dataset */ -#define DEFAULT_DEFLATE_LEVEL 6 +#define DEFAULT_DEFLATE_LEVEL 9 #define DIM0_SCALE_FACTOR 4 #define DIM1_SCALE_FACTOR 2 @@ -89,6 +89,14 @@ typedef struct { #define WRITE_UNSHARED_FILTERED_CHUNKS_CH_NROWS (WRITE_UNSHARED_FILTERED_CHUNKS_NROWS / mpi_size) #define WRITE_UNSHARED_FILTERED_CHUNKS_CH_NCOLS (WRITE_UNSHARED_FILTERED_CHUNKS_NCOLS / mpi_size) +/* Defines for the unshared filtered chunks partial write test */ +#define WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_NAME "unshared_filtered_chunks_partial_write" +#define WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS 2 +#define WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_NROWS (mpi_size * DIM0_SCALE_FACTOR) +#define WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_NCOLS (mpi_size * DIM1_SCALE_FACTOR) +#define WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NROWS (DIM0_SCALE_FACTOR) +#define WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NCOLS (DIM1_SCALE_FACTOR) + /* Defines for the shared filtered chunks write test */ #define WRITE_SHARED_FILTERED_CHUNKS_DATASET_NAME "shared_filtered_chunks_write" #define WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS 2 @@ -97,6 +105,42 @@ typedef struct { #define WRITE_SHARED_FILTERED_CHUNKS_NROWS (WRITE_SHARED_FILTERED_CHUNKS_CH_NROWS * DIM0_SCALE_FACTOR) #define WRITE_SHARED_FILTERED_CHUNKS_NCOLS (WRITE_SHARED_FILTERED_CHUNKS_CH_NCOLS * DIM1_SCALE_FACTOR) +/* Defines for the unshared filtered chunks w/ single unlim. dimension write test */ +#define WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_NAME "unshared_filtered_chunks_single_unlim_dim_write" +#define WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS 2 +#define WRITE_UNSHARED_ONE_UNLIM_DIM_NROWS (mpi_size * DIM0_SCALE_FACTOR) +#define WRITE_UNSHARED_ONE_UNLIM_DIM_NCOLS (mpi_size * DIM1_SCALE_FACTOR) +#define WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NROWS (WRITE_UNSHARED_ONE_UNLIM_DIM_NROWS / mpi_size) +#define WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NCOLS (WRITE_UNSHARED_ONE_UNLIM_DIM_NCOLS / mpi_size) +#define WRITE_UNSHARED_ONE_UNLIM_DIM_NLOOPS 5 + +/* Defines for the shared filtered chunks w/ single unlim. dimension write test */ +#define WRITE_SHARED_ONE_UNLIM_DIM_DATASET_NAME "shared_filtered_chunks_single_unlim_dim_write" +#define WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS 2 +#define WRITE_SHARED_ONE_UNLIM_DIM_CH_NROWS (mpi_size) +#define WRITE_SHARED_ONE_UNLIM_DIM_CH_NCOLS (mpi_size) +#define WRITE_SHARED_ONE_UNLIM_DIM_NROWS (WRITE_SHARED_ONE_UNLIM_DIM_CH_NROWS * DIM0_SCALE_FACTOR) +#define WRITE_SHARED_ONE_UNLIM_DIM_NCOLS (WRITE_SHARED_ONE_UNLIM_DIM_CH_NCOLS * DIM1_SCALE_FACTOR) +#define WRITE_SHARED_ONE_UNLIM_DIM_NLOOPS 5 + +/* Defines for the unshared filtered chunks w/ two unlim. dimension write test */ +#define WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_NAME "unshared_filtered_chunks_two_unlim_dim_write" +#define WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS 2 +#define WRITE_UNSHARED_TWO_UNLIM_DIM_NROWS (mpi_size * DIM0_SCALE_FACTOR) +#define WRITE_UNSHARED_TWO_UNLIM_DIM_NCOLS (DIM1_SCALE_FACTOR) +#define WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NROWS (DIM0_SCALE_FACTOR) +#define WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NCOLS (DIM1_SCALE_FACTOR) +#define WRITE_UNSHARED_TWO_UNLIM_DIM_NLOOPS 5 + +/* Defines for the shared filtered chunks w/ two unlim. dimension write test */ +#define WRITE_SHARED_TWO_UNLIM_DIM_DATASET_NAME "shared_filtered_chunks_two_unlim_dim_write" +#define WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS 2 +#define WRITE_SHARED_TWO_UNLIM_DIM_CH_NROWS (mpi_size) +#define WRITE_SHARED_TWO_UNLIM_DIM_CH_NCOLS (mpi_size) +#define WRITE_SHARED_TWO_UNLIM_DIM_NROWS (mpi_size) +#define WRITE_SHARED_TWO_UNLIM_DIM_NCOLS (mpi_size) +#define WRITE_SHARED_TWO_UNLIM_DIM_NLOOPS 5 + /* Defines for the filtered chunks write test where a process has no selection */ #define WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME "single_no_selection_filtered_chunks_write" #define WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS 2 @@ -403,4 +447,53 @@ typedef struct { #define SHRINKING_GROWING_CHUNKS_CH_NCOLS (SHRINKING_GROWING_CHUNKS_NCOLS / mpi_size) #define SHRINKING_GROWING_CHUNKS_NLOOPS 20 +/* Defines for the unshared filtered edge chunks write test */ +#define WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME "unshared_filtered_edge_chunks_write" +#define WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME2 "unshared_filtered_edge_chunks_no_filter_write" +#define WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS 2 +#define WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS (DIM0_SCALE_FACTOR) +#define WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS (DIM1_SCALE_FACTOR) +#define WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_NROWS (mpi_size * DIM0_SCALE_FACTOR) +#define WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_NCOLS \ + (mpi_size * DIM1_SCALE_FACTOR) + (WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS - 1) + +/* Defines for the shared filtered edge chunks write test */ +#define WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME "shared_filtered_edge_chunks_write" +#define WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME2 "shared_filtered_edge_chunks_no_filter_write" +#define WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS 2 +#define WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NROWS (mpi_size) +#define WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS (mpi_size) +#define WRITE_SHARED_FILTERED_EDGE_CHUNKS_NROWS \ + (WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NROWS * DIM0_SCALE_FACTOR) +#define WRITE_SHARED_FILTERED_EDGE_CHUNKS_NCOLS \ + ((WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS * DIM1_SCALE_FACTOR) + \ + (WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS - 1)) + +/* Defines for the fill values test */ +#define FILL_VALUES_TEST_DATASET_NAME "fill_value_test" +#define FILL_VALUES_TEST_DATASET_NAME2 "fill_value_alloc_test" +#define FILL_VALUES_TEST_DATASET_DIMS 2 +#define FILL_VALUES_TEST_FILL_VAL (-1) +#define FILL_VALUES_TEST_CH_NROWS (mpi_size) +#define FILL_VALUES_TEST_CH_NCOLS (mpi_size + 1) +#define FILL_VALUES_TEST_NROWS (FILL_VALUES_TEST_CH_NROWS * DIM0_SCALE_FACTOR) +#define FILL_VALUES_TEST_NCOLS (FILL_VALUES_TEST_CH_NCOLS * DIM1_SCALE_FACTOR) + +/* Defines for the undefined fill value test */ +#define FILL_VALUE_UNDEFINED_TEST_DATASET_NAME "fill_value_undefined_test" +#define FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS 2 +#define FILL_VALUE_UNDEFINED_TEST_CH_NROWS (mpi_size) +#define FILL_VALUE_UNDEFINED_TEST_CH_NCOLS (mpi_size + 1) +#define FILL_VALUE_UNDEFINED_TEST_NROWS (FILL_VALUE_UNDEFINED_TEST_CH_NROWS * DIM0_SCALE_FACTOR) +#define FILL_VALUE_UNDEFINED_TEST_NCOLS (FILL_VALUE_UNDEFINED_TEST_CH_NCOLS * DIM1_SCALE_FACTOR) + +/* Defines for the fill time of 'never' test */ +#define FILL_TIME_NEVER_TEST_DATASET_NAME "fill_time_never_test" +#define FILL_TIME_NEVER_TEST_DATASET_DIMS 2 +#define FILL_TIME_NEVER_TEST_FILL_VAL (-1) +#define FILL_TIME_NEVER_TEST_CH_NROWS (mpi_size) +#define FILL_TIME_NEVER_TEST_CH_NCOLS (mpi_size + 1) +#define FILL_TIME_NEVER_TEST_NROWS (FILL_TIME_NEVER_TEST_CH_NROWS * DIM0_SCALE_FACTOR) +#define FILL_TIME_NEVER_TEST_NCOLS (FILL_TIME_NEVER_TEST_CH_NCOLS * DIM1_SCALE_FACTOR) + #endif /* TEST_PARALLEL_FILTERS_H_ */ diff --git a/testpar/testphdf5.h b/testpar/testphdf5.h index c692287..16f45d3 100644 --- a/testpar/testphdf5.h +++ b/testpar/testphdf5.h @@ -186,10 +186,6 @@ enum H5TEST_COLL_CHUNK_API { #define TEST_NOT_SIMPLE_OR_SCALAR_DATASPACES 0x010 #define TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_COMPACT 0x020 #define TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_EXTERNAL 0x040 -#define TEST_FILTERS 0x080 -/* TEST_FILTERS will take place of this after supporting mpio + filter for - * H5Dcreate and H5Dwrite */ -#define TEST_FILTERS_READ 0x100 /* Don't erase these lines, they are put here for debugging purposes */ /* |