diff options
-rw-r--r-- | CMakeLists.txt | 27 | ||||
-rw-r--r-- | config/cmake/libhdf5.settings.cmake.in | 36 | ||||
-rw-r--r-- | configure.ac | 63 | ||||
-rw-r--r-- | src/H5Dio.c | 3 | ||||
-rw-r--r-- | src/H5Dmpio.c | 24 | ||||
-rw-r--r-- | src/H5Ppublic.h | 3 | ||||
-rw-r--r-- | src/H5Smpio.c | 35 | ||||
-rw-r--r-- | src/libhdf5.settings.in | 36 | ||||
-rw-r--r-- | testpar/t_filters_parallel.c | 14 |
9 files changed, 199 insertions, 42 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a6d708..eb860d2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -555,10 +555,31 @@ if (HDF5_ENABLE_PARALLEL) set (H5_HAVE_PARALLEL 1) # MPI checks, only do these if MPI_C_FOUND is true, otherwise they always fail # and once set, they are cached as false and not regenerated - set (CMAKE_REQUIRED_LIBRARIES "${MPI_C_LIBRARIES}" ) + set (CMAKE_REQUIRED_LIBRARIES "${MPI_C_LIBRARIES}") + set (CMAKE_REQUIRED_INCLUDES "${MPI_C_INCLUDE_DIRS}") # Used by Fortran + MPI - CHECK_SYMBOL_EXISTS (MPI_Comm_c2f "${MPI_C_INCLUDE_DIRS}/mpi.h" H5_HAVE_MPI_MULTI_LANG_Comm) - CHECK_SYMBOL_EXISTS (MPI_Info_c2f "${MPI_C_INCLUDE_DIRS}/mpi.h" H5_HAVE_MPI_MULTI_LANG_Info) + CHECK_SYMBOL_EXISTS (MPI_Comm_c2f "mpi.h" H5_HAVE_MPI_MULTI_LANG_Comm) + CHECK_SYMBOL_EXISTS (MPI_Info_c2f "mpi.h" H5_HAVE_MPI_MULTI_LANG_Info) + + # Used by Parallel Compression feature + set (PARALLEL_FILTERED_WRITES ON) + CHECK_SYMBOL_EXISTS (MPI_Mprobe "mpi.h" H5_HAVE_MPI_Mprobe) + CHECK_SYMBOL_EXISTS (MPI_Imrecv "mpi.h" H5_HAVE_MPI_Imrecv) + if (NOT H5_HAVE_MPI_Mprobe OR NOT H5_HAVE_MPI_Imrecv) + message (WARNING "The MPI_Mprobe and/or MPI_Imrecv functions could not be located. + Parallel writes of filtered data will be disabled.") + set (PARALLEL_FILTERED_WRITES OFF) + endif () + + # Used by big I/O feature + set (LARGE_PARALLEL_IO ON) + CHECK_SYMBOL_EXISTS (MPI_Get_elements_x "mpi.h" H5_HAVE_MPI_Get_elements_x) + CHECK_SYMBOL_EXISTS (MPI_Type_size_x "mpi.h" H5_HAVE_MPI_Type_size_x) + if (NOT H5_HAVE_MPI_Get_elements_x OR NOT H5_HAVE_MPI_Type_size_x) + message (WARNING "The MPI_Get_elements_x and/or MPI_Type_size_x functions could not be located. + Reading/Writing >2GB of data in a single parallel I/O operation will be disabled.") + set (LARGE_PARALLEL_IO OFF) + endif () else () message (STATUS "Parallel libraries not found") endif () diff --git a/config/cmake/libhdf5.settings.cmake.in b/config/cmake/libhdf5.settings.cmake.in index 891e3a7..6a489e7 100644 --- a/config/cmake/libhdf5.settings.cmake.in +++ b/config/cmake/libhdf5.settings.cmake.in @@ -64,20 +64,22 @@ Languages: Features: --------- - Parallel HDF5: @HDF5_ENABLE_PARALLEL@ - High-level library: @HDF5_BUILD_HL_LIB@ - Threadsafety: @HDF5_ENABLE_THREADSAFE@ - Default API mapping: @DEFAULT_API_VERSION@ - With deprecated public symbols: @HDF5_ENABLE_DEPRECATED_SYMBOLS@ - I/O filters (external): @EXTERNAL_FILTERS@ - MPE: @H5_HAVE_LIBLMPE@ - Direct VFD: @H5_HAVE_DIRECT@ - dmalloc: @H5_HAVE_LIBDMALLOC@ - Packages w/ extra debug output: @INTERNAL_DEBUG_OUTPUT@ - API Tracing: @HDF5_ENABLE_TRACE@ - Using memory checker: @HDF5_ENABLE_USING_MEMCHECKER@ -Memory allocation sanity checks: @HDF5_MEMORY_ALLOC_SANITY_CHECK@ - Metadata trace file: @METADATATRACEFILE@ - Function Stack Tracing: @HDF5_ENABLE_CODESTACK@ - Strict File Format Checks: @HDF5_STRICT_FORMAT_CHECKS@ - Optimization Instrumentation: @HDF5_Enable_Instrument@ + Parallel HDF5: @HDF5_ENABLE_PARALLEL@ +Parallel Filtered Dataset Writes: @PARALLEL_FILTERED_WRITES@ + Large Parallel I/O: @LARGE_PARALLEL_IO@ + High-level library: @HDF5_BUILD_HL_LIB@ + Threadsafety: @HDF5_ENABLE_THREADSAFE@ + Default API mapping: @DEFAULT_API_VERSION@ + With deprecated public symbols: @HDF5_ENABLE_DEPRECATED_SYMBOLS@ + I/O filters (external): @EXTERNAL_FILTERS@ + MPE: @H5_HAVE_LIBLMPE@ + Direct VFD: @H5_HAVE_DIRECT@ + dmalloc: @H5_HAVE_LIBDMALLOC@ + Packages w/ extra debug output: @INTERNAL_DEBUG_OUTPUT@ + API Tracing: @HDF5_ENABLE_TRACE@ + Using memory checker: @HDF5_ENABLE_USING_MEMCHECKER@ + Memory allocation sanity checks: @HDF5_MEMORY_ALLOC_SANITY_CHECK@ + Metadata trace file: @METADATATRACEFILE@ + Function Stack Tracing: @HDF5_ENABLE_CODESTACK@ + Strict File Format Checks: @HDF5_STRICT_FORMAT_CHECKS@ + Optimization Instrumentation: @HDF5_Enable_Instrument@ diff --git a/configure.ac b/configure.ac index 9a8f961..cbc7e01 100644 --- a/configure.ac +++ b/configure.ac @@ -2532,6 +2532,8 @@ esac AC_SUBST([ADD_PARALLEL_FILES]) ADD_PARALLEL_FILES="no" AC_SUBST([MPE]) MPE=no AC_SUBST([INSTRUMENT_LIBRARY]) INSTRUMENT_LIBRARY=no +AC_SUBST([PARALLEL_FILTERED_WRITES]) +AC_SUBST([LARGE_PARALLEL_IO]) if test -n "$PARALLEL"; then ## The 'testpar' directory should participate in the build @@ -2687,6 +2689,61 @@ if test -n "$PARALLEL"; then if test "X-$MPE" = "X-yes"; then AC_DEFINE([HAVE_MPE], [1], [Define if we have MPE support]) fi + + ## ---------------------------------------------------------------------- + ## Check for the MPI-3 functions necessary for the Parallel Compression + ## feature. If these are not present, issue a warning that Parallel + ## Compression will be disabled. + ## + AC_MSG_CHECKING([for MPI_Mprobe and MPI_Imrecv functions]) + + AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [[ + #include <mpi.h> + ]], + [[ + MPI_Message message; + MPI_Init(0, (void *) 0); + MPI_Mprobe(0, 0, 0, &message, (void *) 0); + MPI_Imrecv((void *) 0, 0, 0, (void *) 0, (void *) 0); + ]] + )], + [AC_MSG_RESULT([yes]) + PARALLEL_FILTERED_WRITES=yes], + [AC_MSG_RESULT([no]) + AC_MSG_WARN([A simple MPI program using the MPI_Mprobe and MPI_Imrecv functions could not be compiled and linked. + Parallel writes of filtered data will be disabled.]) + PARALLEL_FILTERED_WRITES=no] + ) + + ## ---------------------------------------------------------------------- + ## Check for the MPI-3 functions necessary for the big I/O feature. + ## If these are not present, issue a warning that the big I/O feature + ## will be disabled. + ## + AC_MSG_CHECKING([for MPI_Get_elements_x and MPI_Type_size_x functions]) + + AC_LINK_IFELSE( + [AC_LANG_PROGRAM( + [[ + #include <mpi.h> + ]], + [[ + MPI_Count count; + MPI_Init(0, (void *) 0); + MPI_Get_elements_x(0, 0, &count); + MPI_Type_size_x(0, &count); + ]] + )], + [AC_MSG_RESULT([yes]) + LARGE_PARALLEL_IO=yes], + [AC_MSG_RESULT([no]) + AC_MSG_WARN([A simple MPI program using the MPI_Get_elements_x and MPI_Type_size_x functions could not be compiled and linked. + Reading/Writing >2GB of data in a single parallel I/O operation will be disabled.]) + LARGE_PARALLEL_IO=no] + ) + fi ## ---------------------------------------------------------------------- @@ -2977,6 +3034,12 @@ AC_SUBST([WORDS_BIGENDIAN]) ## Parallel support? (set above except empty if none) PARALLEL=${PARALLEL:-no} +## Parallel writes to filtered datasets support? +PARALLEL_FILTERED_WRITES=${PARALLEL_FILTERED_WRITES:-no} + +## >2GB writes in parallel support? +LARGE_PARALLEL_IO=${LARGE_PARALLEL_IO:-no} + ## Compiler with version information. This consists of the full path ## name of the compiler and the reported version number. AC_SUBST([CC_VERSION]) diff --git a/src/H5Dio.c b/src/H5Dio.c index 452105e..5fea91f 100644 --- a/src/H5Dio.c +++ b/src/H5Dio.c @@ -1189,7 +1189,8 @@ H5D__ioinfo_adjust(H5D_io_info_t *io_info, const H5D_t *dset, "data transforms needed to be applied", "optimized MPI types flag wasn't set", "one of the dataspaces was neither simple nor scalar", - "dataset was not contiguous or chunked" }; + "dataset was not contiguous or chunked", + "parallel writes to filtered datasets are disabled" }; if(H5CX_get_mpio_local_no_coll_cause(&local_no_collective_cause) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to get local no collective cause value") diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c index f782880..9bffd8b 100644 --- a/src/H5Dmpio.c +++ b/src/H5Dmpio.c @@ -229,9 +229,11 @@ static herr_t H5D__mpio_get_sum_chunk(const H5D_io_info_t *io_info, static herr_t H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, const H5D_chunk_map_t *fm, H5D_filtered_collective_io_info_t **chunk_list, size_t *num_entries); +#if MPI_VERSION >= 3 static herr_t H5D__chunk_redistribute_shared_chunks(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, const H5D_chunk_map_t *fm, H5D_filtered_collective_io_info_t *local_chunk_array, size_t *local_chunk_array_num_entries); +#endif static herr_t H5D__mpio_array_gatherv(void *local_array, size_t local_array_num_entries, size_t array_entry_size, void **gathered_array, size_t *gathered_array_num_entries, hbool_t allgather, int root, MPI_Comm comm, int (*sort_func)(const void *, const void *)); @@ -244,8 +246,10 @@ static herr_t H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io static int H5D__cmp_chunk_addr(const void *chunk_addr_info1, const void *chunk_addr_info2); static int H5D__cmp_filtered_collective_io_info_entry(const void *filtered_collective_io_info_entry1, const void *filtered_collective_io_info_entry2); +#if MPI_VERSION >= 3 static int H5D__cmp_filtered_collective_io_info_entry_owner(const void *filtered_collective_io_info_entry1, const void *filtered_collective_io_info_entry2); +#endif /*********************/ @@ -330,6 +334,18 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space, * use collective IO will defer until each chunk IO is reached. */ +#if MPI_VERSION < 3 + /* + * Don't allow parallel writes to filtered datasets if the MPI version + * is less than 3. The functions needed (MPI_Mprobe and MPI_Imrecv) will + * not be available. + */ + if (io_info->op_type == H5D_IO_OP_WRITE && + io_info->dset->shared->layout.type == H5D_CHUNKED && + io_info->dset->shared->dcpl_cache.pline.nused > 0) + local_cause |= H5D_MPIO_PARALLEL_FILTERED_WRITES_DISABLED; +#endif + /* Check for independent I/O */ if(local_cause & H5D_MPIO_SET_INDEPENDENT) global_cause = local_cause; @@ -2127,6 +2143,7 @@ H5D__cmp_filtered_collective_io_info_entry(const void *filtered_collective_io_in FUNC_LEAVE_NOAPI(H5F_addr_cmp(addr1, addr2)) } /* end H5D__cmp_filtered_collective_io_info_entry() */ +#if MPI_VERSION >= 3 /*------------------------------------------------------------------------- * Function: H5D__cmp_filtered_collective_io_info_entry_owner @@ -2157,6 +2174,7 @@ H5D__cmp_filtered_collective_io_info_entry_owner(const void *filtered_collective FUNC_LEAVE_NOAPI(owner1 - owner2) } /* end H5D__cmp_filtered_collective_io_info_entry_owner() */ +#endif /*------------------------------------------------------------------------- @@ -2585,8 +2603,12 @@ H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_typ /* Redistribute shared chunks to new owners as necessary */ if (io_info->op_type == H5D_IO_OP_WRITE) +#if MPI_VERSION >= 3 if (H5D__chunk_redistribute_shared_chunks(io_info, type_info, fm, local_info_array, &num_chunks_selected) < 0) HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "unable to redistribute shared chunks") +#else + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "unable to redistribute shared chunks - MPI version < 3 (MPI_Mprobe and MPI_Imrecv missing)") +#endif *chunk_list = local_info_array; *num_entries = num_chunks_selected; @@ -2595,6 +2617,7 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5D__construct_filtered_io_info_list() */ +#if MPI_VERSION >= 3 /*------------------------------------------------------------------------- * Function: H5D__chunk_redistribute_shared_chunks @@ -2897,6 +2920,7 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5D__chunk_redistribute_shared_chunks() */ +#endif /*------------------------------------------------------------------------- diff --git a/src/H5Ppublic.h b/src/H5Ppublic.h index 7657283..9f7fdab 100644 --- a/src/H5Ppublic.h +++ b/src/H5Ppublic.h @@ -164,7 +164,8 @@ typedef enum H5D_mpio_no_collective_cause_t { H5D_MPIO_MPI_OPT_TYPES_ENV_VAR_DISABLED = 0x08, H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES = 0x10, H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET = 0x20, - H5D_MPIO_NO_COLLECTIVE_MAX_CAUSE = 0x40 + H5D_MPIO_PARALLEL_FILTERED_WRITES_DISABLED = 0x40, + H5D_MPIO_NO_COLLECTIVE_MAX_CAUSE = 0x80 } H5D_mpio_no_collective_cause_t; /********************/ diff --git a/src/H5Smpio.c b/src/H5Smpio.c index db81ffc..2bd275a 100644 --- a/src/H5Smpio.c +++ b/src/H5Smpio.c @@ -271,29 +271,58 @@ H5S_mpio_create_point_datatype (size_t elmt_size, hsize_t num_points, if(NULL == (inner_disps = (MPI_Aint *)H5MM_malloc(sizeof(MPI_Aint) * (size_t)total_types))) HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") +#if MPI_VERSION < 3 + /* Allocate block sizes for MPI datatype call */ + if(NULL == (blocks = (int *)H5MM_malloc(sizeof(int) * bigio_count))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") + + for(u = 0; u < bigio_count; u++) + blocks[u] = 1; +#endif + for(i=0 ; i<num_big_types ; i++) { +#if MPI_VERSION >= 3 if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block(bigio_count, 1, &disp[i*bigio_count], elmt_type, &inner_types[i]))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code); + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code); } +#else + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)bigio_count, + blocks, + &disp[i*bigio_count], + elmt_type, + &inner_types[i]))) { + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) + } +#endif inner_blocks[i] = 1; inner_disps[i] = 0; } if(remaining_points) { +#if MPI_VERSION >= 3 if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block(remaining_points, 1, &disp[num_big_types*bigio_count], elmt_type, &inner_types[num_big_types]))) { HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code); - } + } +#else + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)remaining_points, + blocks, + &disp[num_big_types*bigio_count], + elmt_type, + &inner_types[num_big_types]))) { + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) + } +#endif inner_blocks[num_big_types] = 1; inner_disps[num_big_types] = 0; - } + } if(MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(total_types, inner_blocks, diff --git a/src/libhdf5.settings.in b/src/libhdf5.settings.in index 51b24dc..531cd00 100644 --- a/src/libhdf5.settings.in +++ b/src/libhdf5.settings.in @@ -67,20 +67,22 @@ Languages: Features: --------- - Parallel HDF5: @PARALLEL@ - High-level library: @HDF5_HL@ - Threadsafety: @THREADSAFE@ - Default API mapping: @DEFAULT_API_VERSION@ - With deprecated public symbols: @DEPRECATED_SYMBOLS@ - I/O filters (external): @EXTERNAL_FILTERS@ - MPE: @MPE@ - Direct VFD: @DIRECT_VFD@ - dmalloc: @HAVE_DMALLOC@ - Packages w/ extra debug output: @INTERNAL_DEBUG_OUTPUT@ - API tracing: @TRACE_API@ - Using memory checker: @USINGMEMCHECKER@ -Memory allocation sanity checks: @MEMORYALLOCSANITYCHECK@ - Metadata trace file: @METADATATRACEFILE@ - Function stack tracing: @CODESTACK@ - Strict file format checks: @STRICT_FORMAT_CHECKS@ - Optimization instrumentation: @INSTRUMENT_LIBRARY@ + Parallel HDF5: @PARALLEL@ +Parallel Filtered Dataset Writes: @PARALLEL_FILTERED_WRITES@ + Large Parallel I/O: @LARGE_PARALLEL_IO@ + High-level library: @HDF5_HL@ + Threadsafety: @THREADSAFE@ + Default API mapping: @DEFAULT_API_VERSION@ + With deprecated public symbols: @DEPRECATED_SYMBOLS@ + I/O filters (external): @EXTERNAL_FILTERS@ + MPE: @MPE@ + Direct VFD: @DIRECT_VFD@ + dmalloc: @HAVE_DMALLOC@ + Packages w/ extra debug output: @INTERNAL_DEBUG_OUTPUT@ + API tracing: @TRACE_API@ + Using memory checker: @USINGMEMCHECKER@ + Memory allocation sanity checks: @MEMORYALLOCSANITYCHECK@ + Metadata trace file: @METADATATRACEFILE@ + Function stack tracing: @CODESTACK@ + Strict file format checks: @STRICT_FORMAT_CHECKS@ + Optimization instrumentation: @INSTRUMENT_LIBRARY@ diff --git a/testpar/t_filters_parallel.c b/testpar/t_filters_parallel.c index 3647732..f436c8f 100644 --- a/testpar/t_filters_parallel.c +++ b/testpar/t_filters_parallel.c @@ -37,6 +37,7 @@ size_t cur_filter_idx = 0; static herr_t set_dcpl_filter(hid_t dcpl); +#if MPI_VERSION >= 3 /* Tests for writing data in parallel */ static void test_write_one_chunk_filtered_dataset(void); static void test_write_filtered_dataset_no_overlap(void); @@ -52,6 +53,7 @@ static void test_write_cmpd_filtered_dataset_no_conversion_unshared(void); static void test_write_cmpd_filtered_dataset_no_conversion_shared(void); static void test_write_cmpd_filtered_dataset_type_conversion_unshared(void); static void test_write_cmpd_filtered_dataset_type_conversion_shared(void); +#endif /* Tests for reading data in parallel */ static void test_read_one_chunk_filtered_dataset(void); @@ -69,8 +71,10 @@ static void test_read_cmpd_filtered_dataset_no_conversion_shared(void); static void test_read_cmpd_filtered_dataset_type_conversion_unshared(void); static void test_read_cmpd_filtered_dataset_type_conversion_shared(void); +#if MPI_VERSION >= 3 /* Other miscellaneous tests */ static void test_shrinking_growing_chunks(void); +#endif /* * Tests for attempting to round-trip the data going from @@ -82,7 +86,9 @@ static void test_shrinking_growing_chunks(void); * written in parallel -> read serially */ static void test_write_serial_read_parallel(void); +#if MPI_VERSION >= 3 static void test_write_parallel_read_serial(void); +#endif static MPI_Comm comm = MPI_COMM_WORLD; static MPI_Info info = MPI_INFO_NULL; @@ -90,6 +96,7 @@ static int mpi_rank; static int mpi_size; static void (*tests[])(void) = { +#if MPI_VERSION >= 3 test_write_one_chunk_filtered_dataset, test_write_filtered_dataset_no_overlap, test_write_filtered_dataset_overlap, @@ -104,6 +111,7 @@ static void (*tests[])(void) = { test_write_cmpd_filtered_dataset_no_conversion_shared, test_write_cmpd_filtered_dataset_type_conversion_unshared, test_write_cmpd_filtered_dataset_type_conversion_shared, +#endif test_read_one_chunk_filtered_dataset, test_read_filtered_dataset_no_overlap, test_read_filtered_dataset_overlap, @@ -119,8 +127,10 @@ static void (*tests[])(void) = { test_read_cmpd_filtered_dataset_type_conversion_unshared, test_read_cmpd_filtered_dataset_type_conversion_shared, test_write_serial_read_parallel, +#if MPI_VERSION >= 3 test_write_parallel_read_serial, test_shrinking_growing_chunks, +#endif }; /* @@ -143,6 +153,7 @@ set_dcpl_filter(hid_t dcpl) } } +#if MPI_VERSION >= 3 /* * Tests parallel write of filtered data in the special * case where a dataset is composed of a single chunk. @@ -2458,6 +2469,7 @@ test_write_cmpd_filtered_dataset_type_conversion_shared(void) return; } +#endif /* * Tests parallel read of filtered data in the special @@ -5528,6 +5540,7 @@ test_write_serial_read_parallel(void) return; } +#if MPI_VERSION >= 3 /* * Tests parallel write of filtered data * to a dataset. After the write has @@ -5839,6 +5852,7 @@ test_shrinking_growing_chunks(void) return; } +#endif int main(int argc, char** argv) |