diff options
author | Quincey Koziol <koziol@hdfgroup.org> | 2002-05-10 15:37:48 (GMT) |
---|---|---|
committer | Quincey Koziol <koziol@hdfgroup.org> | 2002-05-10 15:37:48 (GMT) |
commit | fcdc05f30727e614505a149e572645456d9d4ca8 (patch) | |
tree | 1f6512d0165d7920c97982a459fac7ed1bd87e15 | |
parent | c43feb3092be905574b928b9a10850db5f896a3d (diff) | |
download | hdf5-fcdc05f30727e614505a149e572645456d9d4ca8.zip hdf5-fcdc05f30727e614505a149e572645456d9d4ca8.tar.gz hdf5-fcdc05f30727e614505a149e572645456d9d4ca8.tar.bz2 |
[svn-r5385] Purpose:
New Feature
Description:
Currently, only process 0 writes metadata to disk, leading to a potential
performance bottleneck as the other processors wait for it to catch up.
Solution:
Rotate the metadata responsibilities among all processes, speading out the
work.
Platforms tested:
IRIX64 6.5 (modi4)
-rw-r--r-- | src/H5FDmpio.c | 25 |
1 files changed, 21 insertions, 4 deletions
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index 801a4d6..0f1be8d 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -54,6 +54,7 @@ typedef struct H5FD_mpio_t { MPI_Info info; /*file information */ int mpi_rank; /* This process's rank */ int mpi_size; /* Total number of processes */ + int mpi_round; /* Current round robin process (for metadata I/O) */ hbool_t allsame; /*same data for all procs? */ haddr_t eof; /*end-of-file marker */ haddr_t eoa; /*end-of-address marker */ @@ -828,6 +829,7 @@ H5FD_mpio_open(const char *name, unsigned flags, hid_t fapl_id, file->info = fa->info; file->mpi_rank = mpi_rank; file->mpi_size = mpi_size; + file->mpi_round = 0; /* Start metadata writes with process 0 */ file->btype = MPI_DATATYPE_NULL; file->ftype = MPI_DATATYPE_NULL; @@ -1433,10 +1435,10 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t ad file->old_use_types = use_types_this_time; file->use_types = 0; - /* Only p0 will do the actual write if all procs in comm write same data */ + /* Only p<round> will do the actual write if all procs in comm write same data */ allsame = H5FD_mpio_tas_allsame(_file, FALSE); if (allsame && H5_mpi_1_metawrite_g) { - if (file->mpi_rank != 0) { + if (file->mpi_rank != file->mpi_round) { #ifdef H5FDmpio_DEBUG if (H5FD_mpio_Debug[(int)'w']) { fprintf(stdout, @@ -1501,11 +1503,26 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t ad file->eof = HADDR_UNDEF; done: - /* if only p0 writes, need to boardcast the ret_value to other processes */ + /* if only p<round> writes, need to broadcast the ret_value to other processes */ if (allsame && H5_mpi_1_metawrite_g) { if (MPI_SUCCESS != - MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, 0, file->comm)) + MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, file->mpi_round, file->comm)) HRETURN_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_Bcast failed"); + + /* Round-robin rotate to the next process */ + file->mpi_round = (++file->mpi_round)%file->mpi_size; +#ifdef QAK +{ + int max,min; + + MPI_Allreduce(&file->mpi_round, &max, 1, MPI_INT, MPI_MAX, file->comm); + MPI_Allreduce(&file->mpi_round, &min, 1, MPI_INT, MPI_MIN, file->comm); + if(max!=file->mpi_round) + printf("%s: rank=%d, round=%d, max=%d\n",FUNC,file->mpi_rank,file->mpi_round,max); + if(min!=file->mpi_round) + printf("%s: rank=%d, round=%d, min=%d\n",FUNC,file->mpi_rank,file->mpi_round,min); +} +#endif /* QAK */ } #ifdef H5FDmpio_DEBUG if (H5FD_mpio_Debug[(int)'t']) |