diff options
-rw-r--r-- | src/H5Cdbg.c | 112 | ||||
-rw-r--r-- | src/H5Cmpio.c | 102 | ||||
-rw-r--r-- | src/H5Cprivate.h | 5 | ||||
-rw-r--r-- | src/H5FDmpi.c | 39 | ||||
-rw-r--r-- | src/H5FDmpio.c | 37 | ||||
-rw-r--r-- | src/H5FDprivate.h | 2 | ||||
-rw-r--r-- | src/H5Fmpi.c | 26 | ||||
-rw-r--r-- | src/H5Fprivate.h | 1 |
8 files changed, 298 insertions, 26 deletions
diff --git a/src/H5Cdbg.c b/src/H5Cdbg.c index 0a98406..455b653 100644 --- a/src/H5Cdbg.c +++ b/src/H5Cdbg.c @@ -30,12 +30,16 @@ #include "H5Cmodule.h" /* This source code file is part of the H5C module */ +#define H5AC_FRIEND + + + /***********/ /* Headers */ /***********/ #include "H5private.h" /* Generic Functions */ -#include "H5ACprivate.h" /* Metadata Cache */ +#include "H5ACpkg.h" /* Metadata Cache */ #include "H5Cpkg.h" /* Cache */ #include "H5Eprivate.h" /* Error Handling */ @@ -340,6 +344,112 @@ H5C_dump_cache_skip_list(H5C_t * cache_ptr, char * calling_fcn) /*------------------------------------------------------------------------- + * Function: H5C_dump_coll_write_list + * + * Purpose: Debugging routine that prints a summary of the contents of + * the collective write skip list used by the metadata cache + * in the parallel case to maintain a list of entries to write + * collectively at a sync point. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: John Mainzer + * 4/1/17 + * + *------------------------------------------------------------------------- + */ +#ifdef H5_HAVE_PARALLEL +#ifndef NDEBUG +herr_t +H5C_dump_coll_write_list(H5C_t * cache_ptr, char * calling_fcn) +{ + herr_t ret_value = SUCCEED; /* Return value */ + int i; + int list_len; + H5AC_aux_t * aux_ptr = NULL; + H5C_cache_entry_t * entry_ptr = NULL; + H5SL_node_t * node_ptr = NULL; + + FUNC_ENTER_NOAPI_NOERR + + HDassert(cache_ptr != NULL); + HDassert(cache_ptr->magic == H5C__H5C_T_MAGIC); + HDassert(cache_ptr->aux_ptr); + + aux_ptr = (H5AC_aux_t *)cache_ptr->aux_ptr; + + HDassert(aux_ptr->magic == H5AC__H5AC_AUX_T_MAGIC); + + HDassert(calling_fcn != NULL); + + list_len = (int)H5SL_count(cache_ptr->coll_write_list); + + HDfprintf(stdout, "\n\nDumping MDC coll write list from %d:%s.\n", + aux_ptr->mpi_rank, calling_fcn); + HDfprintf(stdout, " slist len = %u.\n", cache_ptr->slist_len); + + if ( list_len > 0 ) { + + /* scan the collective write list generating the desired output */ + HDfprintf(stdout, + "Num: Addr: Len: Prot/Pind: Dirty: Type:\n"); + + i = 0; + + node_ptr = H5SL_first(cache_ptr->coll_write_list); + + if ( node_ptr != NULL ) + + entry_ptr = (H5C_cache_entry_t *)H5SL_item(node_ptr); + + else + + entry_ptr = NULL; + + while ( entry_ptr != NULL ) { + + HDassert(entry_ptr->magic == H5C__H5C_CACHE_ENTRY_T_MAGIC); + + HDfprintf(stdout, + "%s%d 0x%016llx %4lld %d/%d %d %s\n", + cache_ptr->prefix, i, + (long long)(entry_ptr->addr), + (long long)(entry_ptr->size), + (int)(entry_ptr->is_protected), + (int)(entry_ptr->is_pinned), + (int)(entry_ptr->is_dirty), + entry_ptr->type->name); + + /* HDfprintf(stdout, " node_ptr = 0x%llx, item = %p\n", + (unsigned long long)node_ptr, + H5SL_item(node_ptr)); + */ + + node_ptr = H5SL_next(node_ptr); + + if ( node_ptr != NULL ) + + entry_ptr = (H5C_cache_entry_t *)H5SL_item(node_ptr); + + else + + entry_ptr = NULL; + + i++; + + } /* end while */ + } /* end if */ + + HDfprintf(stdout, "\n\n"); + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5C_dump_coll_write_list() */ +#endif /* NDEBUG */ +#endif /* H5_HAVE_PARALLEL */ + + +/*------------------------------------------------------------------------- * Function: H5C_set_prefix * * Purpose: Set the values of the prefix field of H5C_t. This diff --git a/src/H5Cmpio.c b/src/H5Cmpio.c index 06ce714..0d1a3ff 100644 --- a/src/H5Cmpio.c +++ b/src/H5Cmpio.c @@ -950,12 +950,15 @@ H5C__collective_write(H5F_t *f, hid_t dxpl_id) /* Get original transfer mode */ if(NULL == (plist = (H5P_genplist_t *)H5I_object(dxpl_id))) - HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a data transfer property list") + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, \ + "not a data transfer property list") + if(H5P_get(plist, H5D_XFER_IO_XFER_MODE_NAME, &orig_xfer_mode) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O property") /* Get number of entries in collective write list */ count = (int)H5SL_count(cache_ptr->coll_write_list); + if(count > 0) { H5FD_mpio_xfer_t xfer_mode = H5FD_MPIO_COLLECTIVE; H5SL_node_t *node; @@ -964,21 +967,34 @@ H5C__collective_write(H5F_t *f, hid_t dxpl_id) int i; if(H5P_set(plist, H5D_XFER_IO_XFER_MODE_NAME, &xfer_mode) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O property") + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, \ + "can't set MPI-I/O property") /* Allocate arrays */ - if(NULL == (length_array = (int *)H5MM_malloc((size_t)count * sizeof(int)))) - HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "memory allocation failed for collective write table length array") - if(NULL == (buf_array = (MPI_Aint *)H5MM_malloc((size_t)count * sizeof(MPI_Aint)))) - HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "memory allocation failed for collective buf table length array") - if(NULL == (offset_array = (MPI_Aint *)H5MM_malloc((size_t)count * sizeof(MPI_Aint)))) - HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "memory allocation failed for collective offset table length array") + if ( NULL == (length_array = + (int *)H5MM_malloc((size_t)count * sizeof(int))) ) + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, \ + "memory allocation failed for collective write table length array") + + if ( NULL == (buf_array = + (MPI_Aint *)H5MM_malloc((size_t)count * sizeof(MPI_Aint))) ) + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, \ + "memory allocation failed for collective buf table length array") + + if(NULL == (offset_array = + (MPI_Aint *)H5MM_malloc((size_t)count * sizeof(MPI_Aint))) ) + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, \ + "memory allocation failed for collective offset table length array") /* Fill arrays */ node = H5SL_first(cache_ptr->coll_write_list); HDassert(node); if(NULL == (entry_ptr = (H5C_cache_entry_t *)H5SL_item(node))) - HGOTO_ERROR(H5E_CACHE, H5E_NOTFOUND, FAIL, "can't retrieve skip list item") + HGOTO_ERROR(H5E_CACHE, H5E_NOTFOUND, FAIL, \ + "can't retrieve skip list item") /* Set up initial array position & buffer base address */ length_array[0] = (int)entry_ptr->size; @@ -989,8 +1005,10 @@ H5C__collective_write(H5F_t *f, hid_t dxpl_id) node = H5SL_next(node); i = 1; while(node) { + if(NULL == (entry_ptr = (H5C_cache_entry_t *)H5SL_item(node))) - HGOTO_ERROR(H5E_CACHE, H5E_NOTFOUND, FAIL, "can't retrieve skip list item") + HGOTO_ERROR(H5E_CACHE, H5E_NOTFOUND, FAIL, \ + "can't retrieve skip list item") /* Set up array position */ length_array[i] = (int)entry_ptr->size; @@ -1003,48 +1021,85 @@ H5C__collective_write(H5F_t *f, hid_t dxpl_id) } /* end while */ /* Create memory MPI type */ - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed(count, length_array, buf_array, MPI_BYTE, &btype))) + if(MPI_SUCCESS != (mpi_code = + MPI_Type_create_hindexed(count, length_array, + buf_array, MPI_BYTE, + &btype))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) + btype_created = TRUE; + if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(&btype))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) /* Create file MPI type */ - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed(count, length_array, offset_array, MPI_BYTE, &ftype))) + if(MPI_SUCCESS != (mpi_code = + MPI_Type_create_hindexed(count, length_array, + offset_array, MPI_BYTE, + &ftype))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) + ftype_created = TRUE; + if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(&ftype))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) /* Pass buf type, file type to the file driver */ if(H5FD_mpi_setup_collective(dxpl_id, &btype, &ftype) < 0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O properties") + HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, \ + "can't set MPI-I/O properties") /* Write data */ - if(H5F_block_write(f, H5FD_MEM_DEFAULT, (haddr_t)0, (size_t)1, dxpl_id, base_buf) < 0) - HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "unable to write entries collectively") + if(H5F_block_write(f, H5FD_MEM_DEFAULT, (haddr_t)0, + (size_t)1, dxpl_id, base_buf) < 0) + HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, \ + "unable to write entries collectively") + } /* end if */ else { MPI_Status mpi_stat; - MPI_File mpi_fh_p; + MPI_File *mpi_fh_p; MPI_File mpi_fh; + MPI_Info *info_p; + MPI_Info info; if(H5F_get_mpi_handle(f, (MPI_File **)&mpi_fh_p) < 0) - HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get mpi file handle") + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, \ + "can't get mpi file handle") + mpi_fh = *(MPI_File*)mpi_fh_p; - /* just to match up with the 1st MPI_File_set_view from H5FD_mpio_write() */ - if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(mpi_fh, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL))) + if (H5F_get_mpi_info(f, &info_p) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, \ + "can't get mpi file info") + + info = *info_p; + + /* just to match up with the 1st MPI_File_set_view from + * H5FD_mpio_write() + */ + if(MPI_SUCCESS != (mpi_code = + MPI_File_set_view(mpi_fh, (MPI_Offset)0, MPI_BYTE, + MPI_BYTE, "native", + info))) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) /* just to match up with MPI_File_write_at_all from H5FD_mpio_write() */ HDmemset(&mpi_stat, 0, sizeof(MPI_Status)); - if(MPI_SUCCESS != (mpi_code = MPI_File_write_at_all(mpi_fh, (MPI_Offset)0, NULL, 0, MPI_BYTE, &mpi_stat))) + if(MPI_SUCCESS != (mpi_code = + MPI_File_write_at_all(mpi_fh, (MPI_Offset)0, + NULL, 0, MPI_BYTE, &mpi_stat))) HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code) - /* just to match up with the 2nd MPI_File_set_view (reset) in H5FD_mpio_write() */ - if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(mpi_fh, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, "native", MPI_INFO_NULL))) + /* just to match up with the 2nd MPI_File_set_view (reset) in + * H5FD_mpio_write() + */ + if(MPI_SUCCESS != (mpi_code = + MPI_File_set_view(mpi_fh, (MPI_Offset)0, MPI_BYTE, + MPI_BYTE, "native", + info))) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) + } /* end else */ done: @@ -1063,7 +1118,8 @@ done: if(orig_xfer_mode != H5FD_MPIO_COLLECTIVE) { HDassert(plist); if(H5P_set(plist, H5D_XFER_IO_XFER_MODE_NAME, &orig_xfer_mode) < 0) - HDONE_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O property") + HDONE_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, \ + "can't set MPI-I/O property") } /* end if */ FUNC_LEAVE_NOAPI(ret_value); diff --git a/src/H5Cprivate.h b/src/H5Cprivate.h index bdfb23e..5c5a666 100644 --- a/src/H5Cprivate.h +++ b/src/H5Cprivate.h @@ -2340,9 +2340,12 @@ H5_DLL herr_t H5C_dump_cache_LRU(H5C_t *cache_ptr, const char *cache_name); H5_DLL hbool_t H5C_get_serialization_in_progress(const H5C_t *cache_ptr); H5_DLL hbool_t H5C_cache_is_clean(const H5C_t *cache_ptr, H5C_ring_t inner_ring); H5_DLL herr_t H5C_dump_cache_skip_list(H5C_t *cache_ptr, char *calling_fcn); +#ifdef H5_HAVE_PARALLEL +H5_DLL herr_t H5C_dump_coll_write_list(H5C_t * cache_ptr, char * calling_fcn); +#endif /* H5_HAVE_PARALLEL */ H5_DLL herr_t H5C_get_entry_ptr_from_addr(H5C_t *cache_ptr, haddr_t addr, void **entry_ptr_ptr); -H5_DLL herr_t H5C_flush_dependency_exists(H5C_t *cache_ptr, haddr_t parent_addr, +H5_DLL herr_t H5C_flush_dependency_exists(H5C_t *cache_ptr, haddr_t parent_addr, haddr_t child_addr, hbool_t *fd_exists_ptr); H5_DLL herr_t H5C_verify_entry_type(H5C_t *cache_ptr, haddr_t addr, const H5C_class_t *expected_type, hbool_t *in_cache_ptr, diff --git a/src/H5FDmpi.c b/src/H5FDmpi.c index fdc4eca..bf4e03a 100644 --- a/src/H5FDmpi.c +++ b/src/H5FDmpi.c @@ -148,6 +148,45 @@ done: /*------------------------------------------------------------------------- + * Function: H5FD_get_mpi_info + * + * Purpose: Retrieves the file's mpi info + * + * Return: Success: SUCCEED + * + * Failure: Negative + * + * Programmer: John Mainzer + * 4/4/17 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +herr_t +H5FD_get_mpi_info(H5FD_t *file, void** mpi_info) +{ + const H5FD_class_mpi_t *cls; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(file); + cls = (const H5FD_class_mpi_t *)(file->cls); + HDassert(cls); + HDassert(cls->get_mpi_info); /* All MPI drivers must implement this */ + + /* Dispatch to driver */ + if ((ret_value=(cls->get_mpi_info)(file, mpi_info)) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, MPI_COMM_NULL, \ + "driver get_mpi_info request failed") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_get_mpi_info() */ + + +/*------------------------------------------------------------------------- * Function: H5FD_mpi_MPIOff_to_haddr * * Purpose: Convert an MPI_Offset value to haddr_t. diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index a3a404f..9417d46 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -95,6 +95,7 @@ static herr_t H5FD_mpio_truncate(H5FD_t *_file, hid_t dxpl_id, hbool_t closing); static int H5FD_mpio_mpi_rank(const H5FD_t *_file); static int H5FD_mpio_mpi_size(const H5FD_t *_file); static MPI_Comm H5FD_mpio_communicator(const H5FD_t *_file); +static herr_t H5FD_mpio_get_info(H5FD_t *_file, void** mpi_info); /* The MPIO file driver information */ static const H5FD_class_mpi_t H5FD_mpio_g = { @@ -134,7 +135,8 @@ static const H5FD_class_mpi_t H5FD_mpio_g = { }, /* End of superclass information */ H5FD_mpio_mpi_rank, /*get_rank */ H5FD_mpio_mpi_size, /*get_size */ - H5FD_mpio_communicator /*get_comm */ + H5FD_mpio_communicator, /*get_comm */ + H5FD_mpio_get_info /*get_info */ }; #ifdef H5FDmpio_DEBUG @@ -1308,6 +1310,39 @@ done: /*------------------------------------------------------------------------- + * Function: H5FD_mpio_get_info + * + * Purpose: Returns the file info of MPIO file driver. + * + * Returns: Non-negative if succeed or negative if fails. + * + * Programmer: John Mainzer + * April 4, 2017 + * + * Modifications: + * + *------------------------------------------------------------------------- +*/ +static herr_t +H5FD_mpio_get_info(H5FD_t *_file, void** mpi_info) +{ + H5FD_mpio_t *file = (H5FD_mpio_t *)_file; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + if(!mpi_info) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "mpi info not valid") + + *mpi_info = &(file->info); + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_mpio_get_info() */ + + +/*------------------------------------------------------------------------- * Function: H5FD_mpio_read * * Purpose: Reads SIZE bytes of data from FILE beginning at address ADDR diff --git a/src/H5FDprivate.h b/src/H5FDprivate.h index c64ec30..fb7c43c 100644 --- a/src/H5FDprivate.h +++ b/src/H5FDprivate.h @@ -53,6 +53,7 @@ typedef struct H5FD_class_mpi_t { int (*get_rank)(const H5FD_t *file); /* Get the MPI rank of a process */ int (*get_size)(const H5FD_t *file); /* Get the MPI size of a communicator */ MPI_Comm (*get_comm)(const H5FD_t *file); /* Get the communicator for a file */ + herr_t (*get_mpi_info)(H5FD_t *file, void** mpi_info); /* get MPI_Info for a file */ } H5FD_class_mpi_t; #endif @@ -202,6 +203,7 @@ H5_DLL herr_t H5FD_get_mpio_atomicity(H5FD_t *file, hbool_t *flag); H5_DLL int H5FD_mpi_get_rank(const H5FD_t *file); H5_DLL int H5FD_mpi_get_size(const H5FD_t *file); H5_DLL MPI_Comm H5FD_mpi_get_comm(const H5FD_t *_file); +H5_DLL herr_t H5FD_get_mpi_info(H5FD_t *file, void** file_info); #endif /* H5_HAVE_PARALLEL */ #endif /* !_H5FDprivate_H */ diff --git a/src/H5Fmpi.c b/src/H5Fmpi.c index 5434aa5..60593a8 100644 --- a/src/H5Fmpi.c +++ b/src/H5Fmpi.c @@ -356,5 +356,31 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5F_mpi_retrieve_comm */ +/*------------------------------------------------------------------------- + * Function: H5F_get_mpi_info + * + * Purpose: Retrieves MPI File info. + * + * Return: Success: The size (positive) + * Failure: Negative + * + *------------------------------------------------------------------------- + */ +herr_t +H5F_get_mpi_info(const H5F_t *f, MPI_Info **f_info) +{ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI(FAIL) + + HDassert(f && f->shared); + + /* Dispatch to driver */ + if ((ret_value = H5FD_get_mpi_info(f->shared->lf, (void **)f_info)) < 0) + HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get mpi file info") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5F_get_mpi_info() */ #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5Fprivate.h b/src/H5Fprivate.h index 886063a..8ef353a 100644 --- a/src/H5Fprivate.h +++ b/src/H5Fprivate.h @@ -852,6 +852,7 @@ H5_DLL int H5F_mpi_get_rank(const H5F_t *f); H5_DLL MPI_Comm H5F_mpi_get_comm(const H5F_t *f); H5_DLL int H5F_mpi_get_size(const H5F_t *f); H5_DLL herr_t H5F_mpi_retrieve_comm(hid_t loc_id, hid_t acspl_id, MPI_Comm *mpi_comm); +H5_DLL herr_t H5F_get_mpi_info(const H5F_t *f, MPI_Info **f_info); #endif /* H5_HAVE_PARALLEL */ /* External file cache routines */ |