From 1619a308cbd6cb45ace7724a8aaadf003a3f80a2 Mon Sep 17 00:00:00 2001 From: Quincey Koziol Date: Thu, 20 Nov 2003 09:36:59 -0500 Subject: [svn-r7860] Purpose: Bug fix Description: Our previous "optimization" of metadata writing which only wrote metadata from one process was abusing MPI-I/O and after some consultation with Rob Ross and Rajeev Thakur, Albert & I have come up with a solution... Solution: Instead of only writing from one process, issue a collective write operation with all processes, for metadata writes. Platforms tested: FreeBSD 4.9 (sleipnir) h5committest --- src/H5F.c | 2 ++ src/H5FDmpio.c | 21 +++++++++++++++++++++ src/H5FDmpiposix.c | 11 +++++++++++ src/H5Fpkg.h | 2 ++ 4 files changed, 36 insertions(+) diff --git a/src/H5F.c b/src/H5F.c index c8d9bf2..574f512 100644 --- a/src/H5F.c +++ b/src/H5F.c @@ -237,6 +237,7 @@ H5F_init_interface(void) FUNC_ENTER_NOINIT(H5F_init_interface) +#ifdef OLD_METADATA_WRITE #ifdef H5_HAVE_PARALLEL { /* Allow MPI buf-and-file-type optimizations? */ @@ -246,6 +247,7 @@ H5F_init_interface(void) } } #endif /* H5_HAVE_PARALLEL */ +#endif /* OLD_METADATA_WRITE */ /* * Initialize the atom group for the file IDs. There are two groups: diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index 13d5ee7..2eda0ab 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -154,6 +154,7 @@ static int H5FD_mpio_Debug[256] = 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; #endif +#ifdef OLD_METADATA_WRITE /* Global var to allow elimination of redundant metadata writes * to be controlled by the value of an environment variable. */ /* Use the elimination by default unless this is the Intel Red machine */ @@ -162,6 +163,7 @@ hbool_t H5_mpi_1_metawrite_g = TRUE; #else hbool_t H5_mpi_1_metawrite_g = FALSE; #endif +#endif /* OLD_METADATA_WRITE */ /* Interface initialization */ #define INTERFACE_INIT H5FD_mpio_init @@ -1854,6 +1856,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) +#ifdef OLD_METADATA_WRITE /* Only p will do the actual write if all procs in comm write same metadata */ if (H5_mpi_1_metawrite_g) { if (file->mpi_rank != H5_PAR_META_WRITE) { @@ -1867,6 +1870,22 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, HGOTO_DONE(SUCCEED) /* skip the actual write */ } } +#else /* OLD_METADATA_WRITE */ + /* Remember that views are used */ + use_view_this_time=TRUE; + + /* + * Set the file view when we are using MPI derived types + */ + /*OKAY: CAST DISCARDS CONST QUALIFIER*/ + if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_off, MPI_BYTE, MPI_BYTE, (char*)"native", file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) + + /* When using types, use the address as the displacement for + * MPI_File_set_view and reset the address for the read to zero + */ + mpi_off=0; +#endif /* OLD_METADATA_WRITE */ } /* end if */ /* Write the data. */ @@ -1943,6 +1962,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, file->eof = HADDR_UNDEF; done: +#ifdef OLD_METADATA_WRITE /* Guard against getting into metadate broadcast in failure cases */ if(ret_value!=FAIL) { /* if only p writes, need to broadcast the ret_value to other processes */ @@ -1951,6 +1971,7 @@ done: HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) } /* end if */ } /* end if */ +#endif /* OLD_METADATA_WRITE */ #ifdef H5FDmpio_DEBUG if (H5FD_mpio_Debug[(int)'t']) diff --git a/src/H5FDmpiposix.c b/src/H5FDmpiposix.c index 2625208..647fb35 100644 --- a/src/H5FDmpiposix.c +++ b/src/H5FDmpiposix.c @@ -229,6 +229,7 @@ static const H5FD_class_t H5FD_mpiposix_g = { H5FD_FLMAP_SINGLE /*fl_map */ }; +#ifdef OLD_METADATA_WRITE /* Global var to allow elimination of redundant metadata writes * to be controlled by the value of an environment variable. */ /* Use the elimination by default unless this is the Intel Red machine */ @@ -237,6 +238,7 @@ hbool_t H5_mpiposix_1_metawrite_g = TRUE; #else hbool_t H5_mpiposix_1_metawrite_g = FALSE; #endif +#endif /* OLD_METADATA_WRITE */ /* Interface initialization */ #define INTERFACE_INIT H5FD_mpiposix_init @@ -1309,7 +1311,9 @@ H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) /* Only p will do the actual write if all procs in comm write same metadata */ +#ifdef OLD_METADATA_WRITE if (H5_mpiposix_1_metawrite_g) +#endif /* OLD_METADATA_WRITE */ if (file->mpi_rank != H5_PAR_META_WRITE) HGOTO_DONE(SUCCEED) /* skip the actual write */ } /* end if */ @@ -1384,10 +1388,17 @@ done: /* Guard against getting into metadata broadcast in failure cases */ else { /* if only p writes, need to broadcast the ret_value to other processes */ +#ifdef OLD_METADATA_WRITE if ((type!=H5FD_MEM_DRAW) && H5_mpiposix_1_metawrite_g) { if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) } /* end if */ +#else /* OLD_METADATA_WRITE */ + if (type!=H5FD_MEM_DRAW) { + if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) + } /* end if */ +#endif /* OLD_METADATA_WRITE */ } /* end else */ FUNC_LEAVE_NOAPI(ret_value) diff --git a/src/H5Fpkg.h b/src/H5Fpkg.h index ea11040..2ef630a 100644 --- a/src/H5Fpkg.h +++ b/src/H5Fpkg.h @@ -186,11 +186,13 @@ struct H5F_t { H5F_mtab_t mtab; /* File mount table */ }; +#ifdef OLD_METADATA_WRITE #ifdef H5_HAVE_PARALLEL /* Whether a single process writes metadata */ H5_DLLVAR hbool_t H5_mpi_1_metawrite_g; H5_DLLVAR hbool_t H5_mpiposix_1_metawrite_g; #endif /* H5_HAVE_PARALLEL */ +#endif /* OLD_METADATA_WRITE */ /* Private functions, not part of the publicly documented API */ #ifdef NOT_YET -- cgit v0.12