From 8a43391a1157aa6bccb530cb1a39e6ad5e0e01e2 Mon Sep 17 00:00:00 2001 From: Quincey Koziol Date: Mon, 27 Oct 2003 16:38:27 -0500 Subject: [svn-r7754] Purpose: Code cleanup Description: Straighten out more goofiness in the MPI code dealing with collective I/O transfers - mostly make certain that a view is set if-and-only-if collective I/O is occurring on raw data (and vice versa for views and independent I/O) Platforms tested: FreeBSD 4.9 (sleipnir) w/parallel & FPHDF5 too minor to repquire h5committest --- src/H5Dio.c | 14 +++++++++---- src/H5FDfphdf5.c | 48 ++++++++++++++++++++++--------------------- src/H5FDmpio.c | 62 +++++++++++++++++++++++++++++++------------------------- src/H5S.c | 20 +++++++++++++++++- src/H5Sprivate.h | 2 +- 5 files changed, 89 insertions(+), 57 deletions(-) diff --git a/src/H5Dio.c b/src/H5Dio.c index 0fcc7ea..12d4dec 100644 --- a/src/H5Dio.c +++ b/src/H5Dio.c @@ -515,6 +515,7 @@ H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space, H5S_conv_t *sconv=NULL; /*space conversion funcs*/ #ifdef H5_HAVE_PARALLEL H5FD_mpio_xfer_t xfer_mode; /*xfer_mode for this request */ + hbool_t use_par_opt_io=FALSE; /* Whether the 'optimized' I/O routines with be parallel */ hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */ #endif /*H5_HAVE_PARALLEL*/ H5P_genplist_t *dx_plist=NULL; /* Data transfer property list */ @@ -637,11 +638,13 @@ H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space, } /* end switch */ /* Get dataspace functions */ - if (NULL==(sconv=H5S_find(mem_space, file_space, sconv_flags))) + if (NULL==(sconv=H5S_find(mem_space, file_space, sconv_flags, &use_par_opt_io))) HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to convert from file to memory data space") #ifdef H5_HAVE_PARALLEL - H5D_io_assist_mpio(dx_plist, xfer_mode, &xfer_mode_changed); + /* Don't reset the transfer mode if we can't or won't use it */ + if(!use_par_opt_io || !H5T_path_noop(tpath)) + H5D_io_assist_mpio(dx_plist, xfer_mode, &xfer_mode_changed); #endif /*H5_HAVE_PARALLEL*/ /* Determine correct I/O routine to invoke */ @@ -737,6 +740,7 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space, H5S_conv_t *sconv=NULL; /*space conversion funcs*/ #ifdef H5_HAVE_PARALLEL H5FD_mpio_xfer_t xfer_mode; /*xfer_mode for this request */ + hbool_t use_par_opt_io=FALSE; /* Whether the 'optimized' I/O routines with be parallel */ hbool_t xfer_mode_changed=FALSE; /* Whether the transfer mode was changed */ #endif /*H5_HAVE_PARALLEL*/ H5P_genplist_t *dx_plist=NULL; /* Data transfer property list */ @@ -857,11 +861,13 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space, } /* end switch */ /* Get dataspace functions */ - if (NULL==(sconv=H5S_find(mem_space, file_space, sconv_flags))) + if (NULL==(sconv=H5S_find(mem_space, file_space, sconv_flags, &use_par_opt_io))) HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to convert from memory to file data space") #ifdef H5_HAVE_PARALLEL - H5D_io_assist_mpio(dx_plist, xfer_mode, &xfer_mode_changed); + /* Don't reset the transfer mode if we can't or won't use it */ + if(!use_par_opt_io || !H5T_path_noop(tpath)) + H5D_io_assist_mpio(dx_plist, xfer_mode, &xfer_mode_changed); #endif /*H5_HAVE_PARALLEL*/ /* Determine correct I/O routine to invoke */ diff --git a/src/H5FDfphdf5.c b/src/H5FDfphdf5.c index 59dbecf..577f51d 100644 --- a/src/H5FDfphdf5.c +++ b/src/H5FDfphdf5.c @@ -1161,7 +1161,6 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id, { H5FD_fphdf5_t *file = (H5FD_fphdf5_t*)_file; MPI_Offset mpi_off; - MPI_Offset mpi_disp; MPI_Status status; int mrc; MPI_Datatype buf_type; @@ -1216,6 +1215,9 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id, HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") if (use_view_this_time) { + /* Sanity check that views will only be used by collective I/O */ + assert(xfer_mode==H5FD_MPIO_COLLECTIVE); + /* Prepare for a full-blown xfer using btype, ftype, and disp */ if (H5P_get(plist, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") @@ -1223,26 +1225,27 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id, if (H5P_get(plist, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") + /* Set the file view when we are using MPI derived types */ + if ((mrc = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, + file_type, H5FD_mpio_native, + file->info)) != MPI_SUCCESS) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc) + /* * When using types, use the address as the displacement for * MPI_File_set_view and reset the address for the read to zero */ - mpi_disp = mpi_off; mpi_off = 0; - - /* Set the file view when we are using MPI derived types */ - if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)mpi_disp, MPI_BYTE, - file_type, H5FD_mpio_native, - file->info)) != MPI_SUCCESS) - HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc) } else { + /* Sanity check that independent I/O must be occuring */ + assert(xfer_mode==H5FD_MPIO_INDEPENDENT); + /* * Prepare for a simple xfer of a contiguous block of bytes. The * btype, ftype, and disp fields are not used. */ buf_type = MPI_BYTE; file_type = MPI_BYTE; - mpi_disp = 0; /* mpi_off is already set */ } /* If metadata, check the metadata cache first */ @@ -1271,9 +1274,7 @@ HDfprintf(stderr, "%s:%d: Metadata cache read failed!\n", FUNC, __LINE__); } /* Read the data. */ - assert(xfer_mode == H5FD_MPIO_INDEPENDENT || xfer_mode == H5FD_MPIO_COLLECTIVE); - - if (xfer_mode == H5FD_MPIO_INDEPENDENT) { + if (!use_view_this_time) { if ((mrc = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &status)) != MPI_SUCCESS) HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mrc) @@ -1481,7 +1482,8 @@ H5FD_fphdf5_write_real(H5FD_t *_file, hid_t dxpl_id, haddr_t addr, int size, if (use_view_this_time) { - MPI_Offset mpi_disp; + /* Sanity check that views will only be used by collective I/O */ + assert(xfer_mode==H5FD_MPIO_COLLECTIVE); /* Prepare for a full-blown xfer using btype, ftype, and disp */ if (H5P_get(plist, H5FD_FPHDF5_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0) @@ -1490,19 +1492,21 @@ H5FD_fphdf5_write_real(H5FD_t *_file, hid_t dxpl_id, haddr_t addr, int size, if (H5P_get(plist, H5FD_FPHDF5_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") + /* Set the file view when we are using MPI derived types */ + if ((mrc = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, + file_type, H5FD_mpio_native, + file->info)) != MPI_SUCCESS) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc) + /* * When using types, use the address as the displacement for * MPI_File_set_view and reset the address for the read to zero */ - mpi_disp = mpi_off; mpi_off = 0; - - /* Set the file view when we are using MPI derived types */ - if ((mrc = MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, - file_type, H5FD_mpio_native, - file->info)) != MPI_SUCCESS) - HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc) } else { + /* Sanity check that independent I/O must be occuring */ + assert(xfer_mode==H5FD_MPIO_INDEPENDENT); + /* * Prepare for a simple xfer of a contiguous block of bytes. The * btype and ftype. @@ -1517,9 +1521,7 @@ H5FD_fphdf5_write_real(H5FD_t *_file, hid_t dxpl_id, haddr_t addr, int size, xfer_mode = H5P_peek_unsigned(plist, H5D_XFER_IO_XFER_MODE_NAME); /* Write the data. */ - assert(xfer_mode == H5FD_MPIO_INDEPENDENT || xfer_mode == H5FD_MPIO_COLLECTIVE); - - if (xfer_mode == H5FD_MPIO_INDEPENDENT) { + if (!use_view_this_time) { /*OKAY: CAST DISCARDS CONST QUALIFIER*/ if ((mrc = MPI_File_write_at(file->f, mpi_off, (void*)buf, size, buf_type, &status)) != MPI_SUCCESS) diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index 5bd714b..4fefbde 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -1470,7 +1470,7 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add void *buf/*out*/) { H5FD_mpio_t *file = (H5FD_mpio_t*)_file; - MPI_Offset mpi_off, mpi_disp; + MPI_Offset mpi_off; MPI_Status mpi_stat; int mpi_code; /* mpi return code */ MPI_Datatype buf_type, file_type; @@ -1528,40 +1528,41 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") if (use_view_this_time) { + /* Sanity check that views will only be used by collective I/O */ + assert(xfer_mode==H5FD_MPIO_COLLECTIVE); + /* prepare for a full-blown xfer using btype, ftype, and disp */ if(H5P_get(plist,H5FD_MPIO_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") if(H5P_get(plist,H5FD_MPIO_XFER_FILE_MPI_TYPE_NAME,&file_type)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") + /* + * Set the file view when we are using MPI derived types + */ + /*OKAY: CAST DISCARDS CONST QUALIFIER*/ + if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, (char*)"native", file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) + /* When using types, use the address as the displacement for * MPI_File_set_view and reset the address for the read to zero */ - mpi_disp=mpi_off; mpi_off=0; } /* end if */ else { + /* Sanity check that independent I/O must be occuring */ + assert(xfer_mode==H5FD_MPIO_INDEPENDENT); + /* * Prepare for a simple xfer of a contiguous block of bytes. The * btype, ftype, and disp fields are not used. */ buf_type = MPI_BYTE; file_type = MPI_BYTE; - mpi_disp = 0; /* mpi_off is alread set */ } /* end else */ - /* - * Set the file view when we are using MPI derived types - */ - if (use_view_this_time) { - /*OKAY: CAST DISCARDS CONST QUALIFIER*/ - if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) - } /* end if */ - /* Read the data. */ - assert(H5FD_MPIO_INDEPENDENT==xfer_mode || H5FD_MPIO_COLLECTIVE==xfer_mode); - if (H5FD_MPIO_INDEPENDENT==xfer_mode) { + if (!use_view_this_time) { if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat))) HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code) } else { @@ -1776,7 +1777,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, size_t size, const void *buf) { H5FD_mpio_t *file = (H5FD_mpio_t*)_file; - MPI_Offset mpi_off, mpi_disp; + MPI_Offset mpi_off; MPI_Status mpi_stat; MPI_Datatype buf_type, file_type; int mpi_code; /* MPI return code */ @@ -1835,37 +1836,39 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") if (use_view_this_time) { + /* Sanity check that views will only be used by collective I/O */ + assert(xfer_mode==H5FD_MPIO_COLLECTIVE); + /* prepare for a full-blown xfer using btype, ftype, and disp */ if(H5P_get(plist,H5FD_MPIO_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") if(H5P_get(plist,H5FD_MPIO_XFER_FILE_MPI_TYPE_NAME,&file_type)<0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") + /* + * Set the file view when we are using MPI derived types + */ + /*OKAY: CAST DISCARDS CONST QUALIFIER*/ + if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, (char*)"native", file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) + /* When using types, use the address as the displacement for * MPI_File_set_view and reset the address for the read to zero */ - mpi_disp=mpi_off; mpi_off=0; } /* end if */ else { + /* Sanity check that independent I/O must occur */ + assert(xfer_mode==H5FD_MPIO_INDEPENDENT); + /* * Prepare for a simple xfer of a contiguous block of bytes. * The btype, ftype, and disp fields are not used. */ buf_type = MPI_BYTE; file_type = MPI_BYTE; - mpi_disp = 0; /* mpi_off is already set */ } /* end else */ - /* - * Set the file view when we are using MPI derived types - */ - if (use_view_this_time) { - /*OKAY: CAST DISCARDS CONST QUALIFIER*/ - if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) - } /* end if */ - /* Metadata specific actions */ if(type!=H5FD_MEM_DRAW) { /* Check if we need to syncronize all processes before attempting metadata write @@ -1873,6 +1876,10 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, * and writes the metadata to the file before all the processes have * read the data, "transmitting" data from the "future" to the reading * process. -QAK ) + * + * The only time we don't want to block before a metdata write is when + * we are flushing out a bunch of metadata. Then, we block before the + * first write and don't block for further writes in the sequence. */ if(H5P_exist_plist(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME)>0) if(H5P_get(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,&block_before_meta_write)<0) @@ -1898,8 +1905,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, } /* end if */ /* Write the data. */ - assert(H5FD_MPIO_INDEPENDENT==xfer_mode || H5FD_MPIO_COLLECTIVE==xfer_mode); - if (H5FD_MPIO_INDEPENDENT==xfer_mode) { + if (!use_view_this_time) { /*OKAY: CAST DISCARDS CONST QUALIFIER*/ if (MPI_SUCCESS != (mpi_code=MPI_File_write_at(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat))) HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code) diff --git a/src/H5S.c b/src/H5S.c index f69f8f5..e1d0327 100644 --- a/src/H5S.c +++ b/src/H5S.c @@ -1478,7 +1478,11 @@ H5S_find (const H5S_t *mem_space, const H5S_t *file_space, unsigned #ifndef H5_HAVE_PARALLEL UNUSED #endif /* H5_HAVE_PARALLEL */ -flags) +flags, hbool_t +#ifndef H5_HAVE_PARALLEL +UNUSED +#endif /* H5_HAVE_PARALLEL */ +*use_par_opt_io) { H5S_conv_t *path=NULL; /* Space conversion path */ #ifdef H5_HAVE_PARALLEL @@ -1521,10 +1525,17 @@ flags) /* Check if we can use the optimized parallel I/O routines */ if(opt==TRUE) { + /* Set the pointers to the MPI-specific routines */ H5S_conv_g[i]->read = H5S_mpio_spaces_read; H5S_conv_g[i]->write = H5S_mpio_spaces_write; + + /* Indicate that the I/O will be parallel */ + *use_par_opt_io=TRUE; } /* end if */ else { + /* Indicate that the I/O will _NOT_ be parallel */ + *use_par_opt_io=FALSE; + #endif /* H5_HAVE_PARALLEL */ H5S_conv_g[i]->read = H5S_select_read; H5S_conv_g[i]->write = H5S_select_write; @@ -1556,10 +1567,17 @@ flags) /* Check if we can use the optimized parallel I/O routines */ if(opt==TRUE) { + /* Set the pointers to the MPI-specific routines */ path->read = H5S_mpio_spaces_read; path->write = H5S_mpio_spaces_write; + + /* Indicate that the I/O will be parallel */ + *use_par_opt_io=TRUE; } /* end if */ else { + /* Indicate that the I/O will _NOT_ be parallel */ + *use_par_opt_io=FALSE; + #endif /* H5_HAVE_PARALLEL */ path->read = H5S_select_read; path->write = H5S_select_write; diff --git a/src/H5Sprivate.h b/src/H5Sprivate.h index 389b3b1..46d302f 100644 --- a/src/H5Sprivate.h +++ b/src/H5Sprivate.h @@ -164,7 +164,7 @@ typedef struct H5S_conv_t { H5_DLL H5S_t *H5S_copy(const H5S_t *src); H5_DLL herr_t H5S_close(H5S_t *ds); H5_DLL H5S_conv_t *H5S_find(const H5S_t *mem_space, const H5S_t *file_space, - unsigned flags); + unsigned flags, hbool_t *use_par_opt_io); H5_DLL H5S_class_t H5S_get_simple_extent_type(const H5S_t *ds); H5_DLL hssize_t H5S_get_simple_extent_npoints(const H5S_t *ds); H5_DLL hsize_t H5S_get_npoints_max(const H5S_t *ds); -- cgit v0.12