From 136503bcc1d817df4ab7dc4408a54e857048abe3 Mon Sep 17 00:00:00 2001 From: Dana Robinson Date: Thu, 3 Sep 2020 08:58:57 -0700 Subject: Merges Quincey's parallel collective MD fix from 1.10 --- release_docs/RELEASE.txt | 6 +++ src/H5Cdbg.c | 107 +---------------------------------------------- src/H5Cmpio.c | 75 ++++++++++----------------------- src/H5Cprivate.h | 5 +-- src/H5FDmpi.c | 36 ---------------- src/H5FDmpio.c | 4 +- src/H5FDprivate.h | 2 - src/H5Fmpi.c | 28 ------------- src/H5Fprivate.h | 1 - 9 files changed, 32 insertions(+), 232 deletions(-) diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index dee69b4..f04bf64 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -402,6 +402,12 @@ Bug Fixes since HDF5-1.10.6 release Library ------- + - Fix bug and simplify collective metadata write operation when some ranks + have no entries to contribute. This fixes parallel regression test + failures with IBM SpectrumScale MPI on the Summit system at ORNL. + + (QAK - 2020/09/02) + - Avoid setting up complex MPI types with 0-length vectors, which some MPI implementations don't handle well. (In particular, IBM SpectrumScale MPI on the Summit system at ORNL) diff --git a/src/H5Cdbg.c b/src/H5Cdbg.c index 775db4c..a5ff7bc 100644 --- a/src/H5Cdbg.c +++ b/src/H5Cdbg.c @@ -28,16 +28,12 @@ #include "H5Cmodule.h" /* This source code file is part of the H5C module */ -#define H5AC_FRIEND - - - /***********/ /* Headers */ /***********/ #include "H5private.h" /* Generic Functions */ -#include "H5ACpkg.h" /* Metadata Cache */ +#include "H5ACprivate.h" /* Metadata Cache */ #include "H5Cpkg.h" /* Cache */ #include "H5Eprivate.h" /* Error Handling */ @@ -368,107 +364,6 @@ H5C_dump_cache_skip_list(H5C_t * cache_ptr, char * calling_fcn) /*------------------------------------------------------------------------- - * Function: H5C_dump_coll_write_list - * - * Purpose: Debugging routine that prints a summary of the contents of - * the collective write skip list used by the metadata cache - * in the parallel case to maintain a list of entries to write - * collectively at a sync point. - * - * Return: Non-negative on success/Negative on failure - * - * Programmer: John Mainzer - * 4/1/17 - * - *------------------------------------------------------------------------- - */ -#ifdef H5_HAVE_PARALLEL -#ifndef NDEBUG -herr_t -H5C_dump_coll_write_list(H5C_t * cache_ptr, char * calling_fcn) -{ - herr_t ret_value = SUCCEED; /* Return value */ - int i; - int list_len; - H5AC_aux_t * aux_ptr = NULL; - H5C_cache_entry_t * entry_ptr = NULL; - H5SL_node_t * node_ptr = NULL; - - FUNC_ENTER_NOAPI_NOERR - - HDassert(cache_ptr != NULL); - HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); - HDassert(cache_ptr->aux_ptr); - - aux_ptr = (H5AC_aux_t *)cache_ptr->aux_ptr; - - HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); - - HDassert(calling_fcn != NULL); - - list_len = (int)H5SL_count(cache_ptr->coll_write_list); - - HDfprintf(stdout, "\n\nDumping MDC coll write list from %d:%s.\n", - aux_ptr->mpi_rank, calling_fcn); - HDfprintf(stdout, " slist len = %u.\n", cache_ptr->slist_len); - - if ( list_len > 0 ) { - - /* scan the collective write list generating the desired output */ - HDfprintf(stdout, - "Num: Addr: Len: Prot/Pind: Dirty: Type:\n"); - - i = 0; - - node_ptr = H5SL_first(cache_ptr->coll_write_list); - - if ( node_ptr != NULL ) - - entry_ptr = (H5C_cache_entry_t *)H5SL_item(node_ptr); - - else - - entry_ptr = NULL; - - while ( entry_ptr != NULL ) { - - HDassert(entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC); - - HDfprintf(stdout, - "%s%d 0x%016llx %4lld %d/%d %d %s\n", - cache_ptr->prefix, i, - (long long)(entry_ptr->addr), - (long long)(entry_ptr->size), - (int)(entry_ptr->is_protected), - (int)(entry_ptr->is_pinned), - (int)(entry_ptr->is_dirty), - entry_ptr->type->name); - - node_ptr = H5SL_next(node_ptr); - - if ( node_ptr != NULL ) - - entry_ptr = (H5C_cache_entry_t *)H5SL_item(node_ptr); - - else - - entry_ptr = NULL; - - i++; - - } /* end while */ - } /* end if */ - - HDfprintf(stdout, "\n\n"); - - FUNC_LEAVE_NOAPI(ret_value) - -} /* H5C_dump_coll_write_list() */ -#endif /* NDEBUG */ -#endif /* H5_HAVE_PARALLEL */ - - -/*------------------------------------------------------------------------- * Function: H5C_set_prefix * * Purpose: Set the values of the prefix field of H5C_t. This diff --git a/src/H5Cmpio.c b/src/H5Cmpio.c index 3579bc2..e0b7f17 100644 --- a/src/H5Cmpio.c +++ b/src/H5Cmpio.c @@ -999,6 +999,7 @@ H5C__collective_write(H5F_t *f) { H5AC_t *cache_ptr; H5FD_mpio_xfer_t orig_xfer_mode = H5FD_MPIO_COLLECTIVE; + void *base_buf; int count; int *length_array = NULL; MPI_Aint *buf_array = NULL; @@ -1008,6 +1009,8 @@ H5C__collective_write(H5F_t *f) MPI_Datatype ftype; hbool_t ftype_created = FALSE; int mpi_code; + char unused = 0; /* Unused, except for non-NULL pointer value */ + size_t buf_count; herr_t ret_value = SUCCEED; FUNC_ENTER_STATIC @@ -1022,20 +1025,17 @@ H5C__collective_write(H5F_t *f) if(H5CX_get_io_xfer_mode(&orig_xfer_mode) < 0) HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode") + /* Set transfer mode */ + if(H5CX_set_io_xfer_mode(H5FD_MPIO_COLLECTIVE) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set MPI-I/O transfer mode") + /* Get number of entries in collective write list */ count = (int)H5SL_count(cache_ptr->coll_write_list); - if(count > 0) { - H5FD_mpio_xfer_t xfer_mode = H5FD_MPIO_COLLECTIVE; H5SL_node_t *node; H5C_cache_entry_t *entry_ptr; - void *base_buf; int i; - /* Set new transfer mode */ - if(H5CX_set_io_xfer_mode(xfer_mode) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set MPI-I/O transfer mode") - /* Allocate arrays */ if(NULL == (length_array = (int *)H5MM_malloc((size_t)count * sizeof(int))) ) HGOTO_ERROR(H5E_CACHE, H5E_CANTALLOC, FAIL, "memory allocation failed for collective write table length array") @@ -1059,7 +1059,6 @@ H5C__collective_write(H5F_t *f) node = H5SL_next(node); i = 1; while(node) { - if(NULL == (entry_ptr = (H5C_cache_entry_t *)H5SL_item(node))) HGOTO_ERROR(H5E_CACHE, H5E_NOTFOUND, FAIL, "can't retrieve skip list item") @@ -1076,67 +1075,39 @@ H5C__collective_write(H5F_t *f) /* Create memory MPI type */ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed(count, length_array, buf_array, MPI_BYTE, &btype))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) - btype_created = TRUE; - if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(&btype))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) /* Create file MPI type */ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed(count, length_array, offset_array, MPI_BYTE, &ftype))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) - ftype_created = TRUE; - if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(&ftype))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) - /* Pass buf type, file type to the file driver */ - if(H5CX_set_mpi_coll_datatypes(btype, ftype) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set MPI-I/O properties") - - /* Write data */ - if(H5F_block_write(f, H5FD_MEM_DEFAULT, (haddr_t)0, (size_t)1, base_buf) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to write entries collectively") - + /* MPI count to write */ + buf_count = 1; } /* end if */ else { - MPI_Status mpi_stat; - MPI_File *mpi_fh_p; - MPI_File mpi_fh; - MPI_Info *info_p; - MPI_Info info; - -/* This should be rewritten to call H5F_block_write, with the correct - * buffer and file datatypes (null ones). -QAK, 2018/02/21 - */ - if(H5F_get_mpi_handle(f, (MPI_File **)&mpi_fh_p) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTGET, FAIL, "can't get mpi file handle") - - mpi_fh = *(MPI_File*)mpi_fh_p; - - if(H5F_get_mpi_info(f, &info_p) < 0) - HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get mpi file info") + /* Pass trivial buf type, file type to the file driver */ + btype = MPI_BYTE; + ftype = MPI_BYTE; - info = *info_p; + /* Set non-NULL pointer for I/O operation */ + base_buf = &unused; - /* just to match up with the 1st MPI_File_set_view from - * H5FD_mpio_write() - */ - if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(mpi_fh, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, "native", info))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) + /* MPI count to write */ + buf_count = 0; + } /* end else */ - /* just to match up with MPI_File_write_at_all from H5FD_mpio_write() */ - HDmemset(&mpi_stat, 0, sizeof(MPI_Status)); - if(MPI_SUCCESS != (mpi_code = MPI_File_write_at_all(mpi_fh, (MPI_Offset)0, NULL, 0, MPI_BYTE, &mpi_stat))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code) + /* Pass buf type, file type to the file driver */ + if(H5CX_set_mpi_coll_datatypes(btype, ftype) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTSET, FAIL, "can't set MPI-I/O properties") - /* just to match up with the 2nd MPI_File_set_view (reset) in - * H5FD_mpio_write() - */ - if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(mpi_fh, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, "native", info))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) - } /* end else */ + /* Write data */ + if(H5F_block_write(f, H5FD_MEM_DEFAULT, (haddr_t)0, buf_count, base_buf) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_WRITEERROR, FAIL, "unable to write entries collectively") done: /* Free arrays */ diff --git a/src/H5Cprivate.h b/src/H5Cprivate.h index 3203da6..6411375 100644 --- a/src/H5Cprivate.h +++ b/src/H5Cprivate.h @@ -2338,12 +2338,9 @@ H5_DLL herr_t H5C_dump_cache_LRU(H5C_t *cache_ptr, const char *cache_name); H5_DLL hbool_t H5C_get_serialization_in_progress(const H5C_t *cache_ptr); H5_DLL hbool_t H5C_cache_is_clean(const H5C_t *cache_ptr, H5C_ring_t inner_ring); H5_DLL herr_t H5C_dump_cache_skip_list(H5C_t *cache_ptr, char *calling_fcn); -#ifdef H5_HAVE_PARALLEL -H5_DLL herr_t H5C_dump_coll_write_list(H5C_t * cache_ptr, char * calling_fcn); -#endif /* H5_HAVE_PARALLEL */ H5_DLL herr_t H5C_get_entry_ptr_from_addr(H5C_t *cache_ptr, haddr_t addr, void **entry_ptr_ptr); -H5_DLL herr_t H5C_flush_dependency_exists(H5C_t *cache_ptr, haddr_t parent_addr, +H5_DLL herr_t H5C_flush_dependency_exists(H5C_t *cache_ptr, haddr_t parent_addr, haddr_t child_addr, hbool_t *fd_exists_ptr); H5_DLL herr_t H5C_verify_entry_type(H5C_t *cache_ptr, haddr_t addr, const H5C_class_t *expected_type, hbool_t *in_cache_ptr, diff --git a/src/H5FDmpi.c b/src/H5FDmpi.c index 38096f9..82af11a 100644 --- a/src/H5FDmpi.c +++ b/src/H5FDmpi.c @@ -141,42 +141,6 @@ done: /*------------------------------------------------------------------------- - * Function: H5FD_get_mpi_info - * - * Purpose: Retrieves the file's mpi info - * - * Return: Success: SUCCEED - * - * Failure: FAIL - * - * Programmer: John Mainzer - * 4/4/17 - * - *------------------------------------------------------------------------- - */ -herr_t -H5FD_get_mpi_info(H5FD_t *file, void** mpi_info) -{ - const H5FD_class_mpi_t *cls; - herr_t ret_value = SUCCEED; - - FUNC_ENTER_NOAPI_NOINIT - - HDassert(file); - cls = (const H5FD_class_mpi_t *)(file->cls); - HDassert(cls); - HDassert(cls->get_mpi_info); /* All MPI drivers must implement this */ - - /* Dispatch to driver */ - if((ret_value = (cls->get_mpi_info)(file, mpi_info)) < 0) - HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "driver get_mpi_info request failed") - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5FD_get_mpi_info() */ - - -/*------------------------------------------------------------------------- * Function: H5FD_mpi_MPIOff_to_haddr * * Purpose: Convert an MPI_Offset value to haddr_t. diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index ec1ca87..c1e1198 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -94,7 +94,6 @@ static herr_t H5FD_mpio_truncate(H5FD_t *_file, hid_t dxpl_id, hbool_t closing); static int H5FD_mpio_mpi_rank(const H5FD_t *_file); static int H5FD_mpio_mpi_size(const H5FD_t *_file); static MPI_Comm H5FD_mpio_communicator(const H5FD_t *_file); -static herr_t H5FD_mpio_get_info(H5FD_t *_file, void** mpi_info); /* The MPIO file driver information */ static const H5FD_class_mpi_t H5FD_mpio_g = { @@ -134,8 +133,7 @@ static const H5FD_class_mpi_t H5FD_mpio_g = { }, /* End of superclass information */ H5FD_mpio_mpi_rank, /*get_rank */ H5FD_mpio_mpi_size, /*get_size */ - H5FD_mpio_communicator, /*get_comm */ - H5FD_mpio_get_info /*get_info */ + H5FD_mpio_communicator /*get_comm */ }; #ifdef H5FDmpio_DEBUG diff --git a/src/H5FDprivate.h b/src/H5FDprivate.h index c5bc043..d659a34 100644 --- a/src/H5FDprivate.h +++ b/src/H5FDprivate.h @@ -51,7 +51,6 @@ typedef struct H5FD_class_mpi_t { int (*get_rank)(const H5FD_t *file); /* Get the MPI rank of a process */ int (*get_size)(const H5FD_t *file); /* Get the MPI size of a communicator */ MPI_Comm (*get_comm)(const H5FD_t *file); /* Get the communicator for a file */ - herr_t (*get_mpi_info)(H5FD_t *file, void** mpi_info); /* get MPI_Info for a file */ } H5FD_class_mpi_t; #endif @@ -174,7 +173,6 @@ H5_DLL herr_t H5FD_get_mpio_atomicity(H5FD_t *file, hbool_t *flag); H5_DLL int H5FD_mpi_get_rank(const H5FD_t *file); H5_DLL int H5FD_mpi_get_size(const H5FD_t *file); H5_DLL MPI_Comm H5FD_mpi_get_comm(const H5FD_t *_file); -H5_DLL herr_t H5FD_get_mpi_info(H5FD_t *file, void** file_info); #endif /* H5_HAVE_PARALLEL */ #endif /* !_H5FDprivate_H */ diff --git a/src/H5Fmpi.c b/src/H5Fmpi.c index b6cf3a3..fe85c89 100644 --- a/src/H5Fmpi.c +++ b/src/H5Fmpi.c @@ -343,33 +343,5 @@ H5F_mpi_retrieve_comm(hid_t loc_id, hid_t acspl_id, MPI_Comm *mpi_comm) done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5F_mpi_retrieve_comm */ - - -/*------------------------------------------------------------------------- - * Function: H5F_get_mpi_info - * - * Purpose: Retrieves MPI File info. - * - * Return: Success: The size (positive) - * Failure: Negative - * - *------------------------------------------------------------------------- - */ -herr_t -H5F_get_mpi_info(const H5F_t *f, MPI_Info **f_info) -{ - herr_t ret_value = SUCCEED; - - FUNC_ENTER_NOAPI(FAIL) - - HDassert(f && f->shared); - - /* Dispatch to driver */ - if ((ret_value = H5FD_get_mpi_info(f->shared->lf, (void **)f_info)) < 0) - HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get mpi file info") - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5F_get_mpi_info() */ #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5Fprivate.h b/src/H5Fprivate.h index 469a37c..cb6e6fe 100644 --- a/src/H5Fprivate.h +++ b/src/H5Fprivate.h @@ -855,7 +855,6 @@ H5_DLL int H5F_mpi_get_rank(const H5F_t *f); H5_DLL MPI_Comm H5F_mpi_get_comm(const H5F_t *f); H5_DLL int H5F_mpi_get_size(const H5F_t *f); H5_DLL herr_t H5F_mpi_retrieve_comm(hid_t loc_id, hid_t acspl_id, MPI_Comm *mpi_comm); -H5_DLL herr_t H5F_get_mpi_info(const H5F_t *f, MPI_Info **f_info); #endif /* H5_HAVE_PARALLEL */ /* External file cache routines */ -- cgit v0.12