From 1b27db1755be9cef391ee93a0e3c03360cd0c8ff Mon Sep 17 00:00:00 2001 From: Quincey Koziol Date: Thu, 20 Nov 2003 09:37:11 -0500 Subject: [svn-r7861] Purpose: Bug fix Description: Our previous "optimization" of metadata writing which only wrote metadata from one process was abusing MPI-I/O and after some consultation with Rob Ross and Rajeev Thakur, Albert & I have come up with a solution... Solution: Instead of only writing from one process, issue a collective write operation with all processes, for metadata writes. Platforms tested: FreeBSD 4.9 (sleipnir) h5committest --- src/H5F.c | 2 ++ src/H5FDmpio.c | 21 +++++++++++++++++++++ src/H5FDmpiposix.c | 12 ++++++++++++ src/H5Fpkg.h | 2 ++ 4 files changed, 37 insertions(+) diff --git a/src/H5F.c b/src/H5F.c index 63d6408..586e416 100644 --- a/src/H5F.c +++ b/src/H5F.c @@ -232,6 +232,7 @@ H5F_init_interface(void) FUNC_ENTER_NOINIT(H5F_init_interface); +#ifdef OLD_METADATA_WRITE #ifdef H5_HAVE_PARALLEL { /* Allow MPI buf-and-file-type optimizations? */ @@ -241,6 +242,7 @@ H5F_init_interface(void) } } #endif /* H5_HAVE_PARALLEL */ +#endif /* OLD_METADATA_WRITE */ /* * Initialize the atom group for the file IDs. There are two groups: diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index dc98bc2..492cba3 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -149,6 +149,7 @@ static int H5FD_mpio_Debug[256] = 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; #endif +#ifdef OLD_METADATA_WRITE /* Global var to allow elimination of redundant metadata writes * to be controlled by the value of an environment variable. */ /* Use the elimination by default unless this is the Intel Red machine */ @@ -157,6 +158,7 @@ hbool_t H5_mpi_1_metawrite_g = TRUE; #else hbool_t H5_mpi_1_metawrite_g = FALSE; #endif +#endif /* OLD_METADATA_WRITE */ /* Interface initialization */ #define PABLO_MASK H5FD_mpio_mask @@ -1852,6 +1854,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code); +#ifdef OLD_METADATA_WRITE /* Only p will do the actual write if all procs in comm write same metadata */ if (H5_mpi_1_metawrite_g) { if (file->mpi_rank != H5_PAR_META_WRITE) { @@ -1865,6 +1868,22 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, HGOTO_DONE(SUCCEED) /* skip the actual write */ } } +#else /* OLD_METADATA_WRITE */ + /* Remember that views are used */ + use_view_this_time=TRUE; + + /* + * Set the file view when we are using MPI derived types + */ + /*OKAY: CAST DISCARDS CONST QUALIFIER*/ + if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_off, MPI_BYTE, MPI_BYTE, (char*)"native", file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) + + /* When using types, use the address as the displacement for + * MPI_File_set_view and reset the address for the read to zero + */ + mpi_off=0; +#endif /* OLD_METADATA_WRITE */ } /* end if */ /* Write the data. */ @@ -1941,6 +1960,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, file->eof = HADDR_UNDEF; done: +#ifdef OLD_METADATA_WRITE /* Guard against getting into metadate broadcast in failure cases */ if(ret_value!=FAIL) { /* if only p writes, need to broadcast the ret_value to other processes */ @@ -1949,6 +1969,7 @@ done: HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code); } /* end if */ } /* end if */ +#endif /* OLD_METADATA_WRITE */ #ifdef H5FDmpio_DEBUG if (H5FD_mpio_Debug[(int)'t']) diff --git a/src/H5FDmpiposix.c b/src/H5FDmpiposix.c index 4920ade..8bca0bf 100644 --- a/src/H5FDmpiposix.c +++ b/src/H5FDmpiposix.c @@ -224,6 +224,7 @@ static const H5FD_class_t H5FD_mpiposix_g = { H5FD_FLMAP_SINGLE /*fl_map */ }; +#ifdef OLD_METADATA_WRITE /* Global var to allow elimination of redundant metadata writes * to be controlled by the value of an environment variable. */ /* Use the elimination by default unless this is the Intel Red machine */ @@ -232,6 +233,7 @@ hbool_t H5_mpiposix_1_metawrite_g = TRUE; #else hbool_t H5_mpiposix_1_metawrite_g = FALSE; #endif +#endif /* OLD_METADATA_WRITE */ /* Interface initialization */ #define PABLO_MASK H5FD_mpiposix_mask @@ -1424,7 +1426,9 @@ H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code); /* Only p will do the actual write if all procs in comm write same metadata */ +#ifdef OLD_METADATA_WRITE if (H5_mpiposix_1_metawrite_g) +#endif /* OLD_METADATA_WRITE */ if (file->mpi_rank != H5_PAR_META_WRITE) HGOTO_DONE(SUCCEED) /* skip the actual write */ } /* end if */ @@ -1498,11 +1502,19 @@ done: } /* end if */ /* Guard against getting into metadata broadcast in failure cases */ else { +#ifdef OLD_METADATA_WRITE /* if only p writes, need to broadcast the ret_value to other processes */ if ((type!=H5FD_MEM_DRAW) && H5_mpiposix_1_metawrite_g) { if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm))) HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code); } /* end if */ +#else /* OLD_METADATA_WRITE */ + /* if only p writes, need to broadcast the ret_value to other processes */ + if (type!=H5FD_MEM_DRAW) { + if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code); + } /* end if */ +#endif /* OLD_METADATA_WRITE */ } /* end else */ FUNC_LEAVE_NOAPI(ret_value); diff --git a/src/H5Fpkg.h b/src/H5Fpkg.h index 631d482..257e990 100644 --- a/src/H5Fpkg.h +++ b/src/H5Fpkg.h @@ -185,11 +185,13 @@ struct H5F_t { H5F_mtab_t mtab; /* File mount table */ }; +#ifdef OLD_METADATA_WRITE #ifdef H5_HAVE_PARALLEL /* Whether a single process writes metadata */ H5_DLLVAR hbool_t H5_mpi_1_metawrite_g; H5_DLLVAR hbool_t H5_mpiposix_1_metawrite_g; #endif /* H5_HAVE_PARALLEL */ +#endif /* OLD_METADATA_WRITE */ /* Private functions, not part of the publicly documented API */ #ifdef NOT_YET -- cgit v0.12