diff options
author | Quincey Koziol <koziol@hdfgroup.org> | 2008-04-24 15:03:41 (GMT) |
---|---|---|
committer | Quincey Koziol <koziol@hdfgroup.org> | 2008-04-24 15:03:41 (GMT) |
commit | 495ca9c7bb19553d2c87ce68013f1de2dff3d54b (patch) | |
tree | 792e1a9ecc8aa314dfa3d0538464e4f87ad55cf5 /src/H5FDmpio.c | |
parent | 16d4cae5b16ffb91298d8d232214afeb5112da6d (diff) | |
download | hdf5-495ca9c7bb19553d2c87ce68013f1de2dff3d54b.zip hdf5-495ca9c7bb19553d2c87ce68013f1de2dff3d54b.tar.gz hdf5-495ca9c7bb19553d2c87ce68013f1de2dff3d54b.tar.bz2 |
[svn-r14860] Description:
Omnibus raw data I/O revisions, with wide-ranging changes and
refactoring, in order to prepare for implementing "fast append" feature.
These changes remove the majority of the code duplication for raw data
I/O which has crept in over the last ten years and introduces a more object-
oriented design for operating on different types of dataset storage.
Chunked storage no longer has it's own I/O routines, it is now handled
as either contiguous (if chunk is not pulled into the cache) or compact (if the
chunk is cached in memory).
No bug or feature changes, at least intentionally... :-)
Tested on:
FreeBSD/32 6.2 (duty) in debug mode
FreeBSD/64 6.2 (liberty) w/C++ & FORTRAN, in debug mode
Linux/32 2.6 (kagiso) w/PGI compilers, w/C++ & FORTRAN, w/threadsafe,
in debug mode
Linux/64-amd64 2.6 (smirom) w/default API=1.6.x, w/C++ & FORTRAN,
in production mode
Linux/64-ia64 2.6 (cobalt) w/Intel compilers, w/C++ & FORTRAN,
in production mode
Solaris/32 2.10 (linew) w/deprecated symbols disabled, w/C++ & FORTRAN,
w/szip filter, in production mode
Mac OS X/32 10.5.2 (amazon) in debug mode
Linux/64-ia64 2.4 (tg-login3) w/parallel, w/FORTRAN, in production mode
Diffstat (limited to 'src/H5FDmpio.c')
-rw-r--r-- | src/H5FDmpio.c | 188 |
1 files changed, 61 insertions, 127 deletions
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index c8087bb..89a8637 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -572,15 +572,15 @@ H5Pset_dxpl_mpio_collective_opt(hid_t dxpl_id, H5FD_mpio_collective_opt_t opt_mo HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl") /* Set the transfer mode */ - if (H5P_set(plist,H5D_XFER_MPIO_COLLECTIVE_OPT_NAME,&opt_mode)<0) + if(H5P_set(plist, H5D_XFER_MPIO_COLLECTIVE_OPT_NAME, &opt_mode) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to set value") /* Initialize driver-specific properties */ - ret_value= H5P_set_driver(plist, H5FD_MPIO, NULL); + ret_value = H5P_set_driver(plist, H5FD_MPIO, NULL); done: FUNC_LEAVE_API(ret_value) -} +} /* end H5Pset_dxpl_mpio_collective_opt() */ /*------------------------------------------------------------------------- @@ -1413,8 +1413,8 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add int type_size; /* MPI datatype used for I/O's size */ int io_size; /* Actual number of bytes requested */ H5P_genplist_t *plist; /* Property list pointer */ - unsigned use_view_this_time=0; - herr_t ret_value=SUCCEED; + hbool_t use_view_this_time = FALSE; + herr_t ret_value = SUCCEED; FUNC_ENTER_NOAPI(H5FD_mpio_read, FAIL) @@ -1464,7 +1464,7 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add MPI_Datatype file_type; /* Remember that views are used */ - use_view_this_time=TRUE; + use_view_this_time = TRUE; /* prepare for a full-blown xfer using btype, ftype, and disp */ if(H5P_get(plist,H5FD_MPI_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0) @@ -1487,9 +1487,9 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add } /* end if */ /* Read the data. */ - if (use_view_this_time) { + if(use_view_this_time) { H5FD_mpio_collective_opt_t coll_opt_mode; - H5FD_mpio_collective_opt_t xfer_opt_mode; + #ifdef H5FDmpio_DEBUG if (H5FD_mpio_Debug[(int)'t']) fprintf(stdout, "H5FD_mpio_read: using MPIO collective mode\n"); @@ -1497,28 +1497,23 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add /* Peek the collective_opt property to check whether the application wants to do IO individually. */ coll_opt_mode = (H5FD_mpio_collective_opt_t)H5P_peek_unsigned(plist, H5D_XFER_MPIO_COLLECTIVE_OPT_NAME); - /* Peek the xfer_opt_mode property to check whether the application wants to do IO individually. */ - xfer_opt_mode = (H5FD_mpio_collective_opt_t)H5P_peek_unsigned(plist, H5D_XFER_IO_XFER_OPT_MODE_NAME); - - if(coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO && xfer_opt_mode == H5FD_MPIO_COLLECTIVE_IO) { + if(coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO) { #ifdef H5FDmpio_DEBUG - if (H5FD_mpio_Debug[(int)'t']) - fprintf(stdout, "H5FD_mpio_read: doing MPI collective IO\n"); + if(H5FD_mpio_Debug[(int)'t']) + fprintf(stdout, "H5FD_mpio_read: doing MPI collective IO\n"); #endif -/* Temporarily change to read_at_all - if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat )))*/ - if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat ))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code) - } + if(MPI_SUCCESS != (mpi_code = MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code) + } /* end if */ else { #ifdef H5FDmpio_DEBUG - if (H5FD_mpio_Debug[(int)'t']) - fprintf(stdout, "H5FD_mpio_read: doing MPI independent IO\n"); + if(H5FD_mpio_Debug[(int)'t']) + fprintf(stdout, "H5FD_mpio_read: doing MPI independent IO\n"); #endif - if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat ))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code) - } + if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code) + } /* end else */ /* * Reset the file view when we used MPI derived types @@ -1700,7 +1695,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, int size_i, bytes_written; int type_size; /* MPI datatype used for I/O's size */ int io_size; /* Actual number of bytes requested */ - unsigned use_view_this_time=0; + hbool_t use_view_this_time = FALSE; H5P_genplist_t *plist; /* Property list pointer */ herr_t ret_value=SUCCEED; @@ -1718,26 +1713,25 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, assert(buf); /* Portably initialize MPI status variable */ - HDmemset(&mpi_stat,0,sizeof(MPI_Status)); + HDmemset(&mpi_stat, 0, sizeof(MPI_Status)); /* some numeric conversions */ - if (H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off)<0) + if(H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off) < 0) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off") size_i = (int)size; - if ((hsize_t)size_i != size) + if((hsize_t)size_i != size) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i") #ifdef H5FDmpio_DEBUG - if (H5FD_mpio_Debug[(int)'w']) - fprintf(stdout, "in H5FD_mpio_write mpi_off=%ld size_i=%d\n", - (long)mpi_off, size_i); + if(H5FD_mpio_Debug[(int)'w']) + fprintf(stdout, "in H5FD_mpio_write mpi_off=%ld size_i=%d\n", (long)mpi_off, size_i); #endif /* Obtain the data transfer properties */ if(NULL == (plist = (H5P_genplist_t *)H5I_object(dxpl_id))) HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list") - if(type==H5FD_MEM_DRAW) { + if(type == H5FD_MEM_DRAW) { H5FD_mpio_xfer_t xfer_mode; /* I/O tranfer mode */ /* Obtain the data transfer properties */ @@ -1749,114 +1743,71 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, * us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which * could mean "use MPI_BYTE" by convention). */ - if(xfer_mode==H5FD_MPIO_COLLECTIVE) { + if(xfer_mode == H5FD_MPIO_COLLECTIVE) { MPI_Datatype file_type; /* Remember that views are used */ - use_view_this_time=TRUE; + use_view_this_time = TRUE; /* prepare for a full-blown xfer using btype, ftype, and disp */ - if(H5P_get(plist,H5FD_MPI_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0) + if(H5P_get(plist, H5FD_MPI_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") - if(H5P_get(plist,H5FD_MPI_XFER_FILE_MPI_TYPE_NAME,&file_type)<0) + if(H5P_get(plist, H5FD_MPI_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0) HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property") /* * Set the file view when we are using MPI derived types */ /*OKAY: CAST DISCARDS CONST QUALIFIER*/ - if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, H5FD_mpi_native_g, file->info))) + if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, H5FD_mpi_native_g, file->info))) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) /* When using types, use the address as the displacement for * MPI_File_set_view and reset the address for the read to zero */ - mpi_off=0; + mpi_off = 0; } /* end if */ } /* end if */ else { - unsigned block_before_meta_write=0; /* Whether to block before a metadata write */ - - /* Check if we need to syncronize all processes before attempting metadata write - * (Prevents race condition where the process writing the metadata goes ahead - * and writes the metadata to the file before all the processes have - * read the data, "transmitting" data from the "future" to the reading - * process. -QAK ) - * - * The only time we don't want to block before a metadata write is when - * we are flushing out a bunch of metadata. Then, we block before the - * first write and don't block for further writes in the sequence. - */ - if(H5P_exist_plist(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME)>0) - if(H5P_get(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,&block_before_meta_write)<0) - HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get H5AC property") - -#if 0 /* JRM */ - /* The metadata cache now only writes from process 0, which makes - * this synchronization incorrect. I'm leaving this code commented - * out instead of deleting it to remind us that we should re-write - * this function so that a metadata write from any other process - * should flag an error. - * -- JRM 9/1/05 - */ - if(block_before_meta_write) - if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code) -#endif /* JRM */ - - /* Only one process will do the actual write if all procs in comm write same metadata */ - if (file->mpi_rank != H5_PAR_META_WRITE) { -#ifdef H5FDmpio_DEBUG - if (H5FD_mpio_Debug[(int)'w']) { - fprintf(stdout, - " proc %d: in H5FD_mpio_write (write omitted)\n", - file->mpi_rank ); - } -#endif + /* Only one process can do the actual metadata write */ + if(file->mpi_rank != H5_PAR_META_WRITE) +#ifdef LATER + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "can't write metadata from non-zero rank") +#else /* LATER */ HGOTO_DONE(SUCCEED) /* skip the actual write */ - } /* end if */ +#endif /* LATER */ } /* end if */ /* Write the data. */ - if (use_view_this_time) { - H5FD_mpio_collective_opt_t coll_opt_mode; - H5FD_mpio_collective_opt_t xfer_opt_mode; + if(use_view_this_time) { + H5FD_mpio_collective_opt_t coll_opt_mode; + #ifdef H5FDmpio_DEBUG - if (H5FD_mpio_Debug[(int)'t']) + if(H5FD_mpio_Debug[(int)'t']) fprintf(stdout, "H5FD_mpio_write: using MPIO collective mode\n"); #endif /* Peek the collective_opt property to check whether the application wants to do IO individually. */ - coll_opt_mode = (H5FD_mpio_collective_opt_t)H5P_peek_unsigned(plist,H5D_XFER_MPIO_COLLECTIVE_OPT_NAME); - - /* Peek the xfer_opt_mode property to check whether the application wants to do IO individually. */ - xfer_opt_mode = (H5FD_mpio_collective_opt_t)H5P_peek_unsigned(plist,H5D_XFER_IO_XFER_OPT_MODE_NAME); + coll_opt_mode = (H5FD_mpio_collective_opt_t)H5P_peek_unsigned(plist, H5D_XFER_MPIO_COLLECTIVE_OPT_NAME); /*OKAY: CAST DISCARDS CONST QUALIFIER*/ - if(coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO && xfer_opt_mode == H5FD_MPIO_COLLECTIVE_IO ) { + if(coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO) { #ifdef H5FDmpio_DEBUG - if (H5FD_mpio_Debug[(int)'t']) - fprintf(stdout, "H5FD_mpio_write: doing MPI collective IO\n"); + if(H5FD_mpio_Debug[(int)'t']) + fprintf(stdout, "H5FD_mpio_write: doing MPI collective IO\n"); #endif - /* Temporarily change to _at -if (MPI_SUCCESS != (mpi_code=MPI_File_write_at_all(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat))) -*/ - if (MPI_SUCCESS != (mpi_code=MPI_File_write_at_all(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code) - } + if(MPI_SUCCESS != (mpi_code = MPI_File_write_at_all(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code) + } /* end if */ else { #ifdef H5FDmpio_DEBUG - if (H5FD_mpio_Debug[(int)'t']) - fprintf(stdout, "H5FD_mpio_write: doing MPI independent IO\n"); + if(H5FD_mpio_Debug[(int)'t']) + fprintf(stdout, "H5FD_mpio_write: doing MPI independent IO\n"); #endif - - if (MPI_SUCCESS != (mpi_code=MPI_File_write_at(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat))) - HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code) - } - + if(MPI_SUCCESS != (mpi_code = MPI_File_write_at(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code) + } /* end else */ - /* - * Reset the file view when we used MPI derived types - */ + /* Reset the file view when we used MPI derived types */ /*OKAY: CAST DISCARDS CONST QUALIFIER*/ if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, H5FD_mpi_native_g, file->info))) HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) @@ -1872,48 +1823,31 @@ if (MPI_SUCCESS != (mpi_code=MPI_File_write_at_all(file->f, mpi_off, (void*)buf, * datatype in this call though... (We aren't because using it causes * the LANL "qsc" machine to dump core - 12/19/03) - QAK] */ - if (MPI_SUCCESS != (mpi_code=MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_written))) + if(MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_written))) HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code) /* Get the type's size */ - if (MPI_SUCCESS != (mpi_code=MPI_Type_size(buf_type,&type_size))) + if(MPI_SUCCESS != (mpi_code = MPI_Type_size(buf_type, &type_size))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code) /* Compute the actual number of bytes requested */ - io_size=type_size*size_i; + io_size = type_size * size_i; /* Check for write failure */ - if (bytes_written != io_size) + if(bytes_written != io_size) HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "file write failed") /* Forget the EOF value (see H5FD_mpio_get_eof()) --rpm 1999-08-06 */ file->eof = HADDR_UNDEF; done: - -#if 0 /* JRM */ - /* Since metadata writes are now done by process 0 only, this broadcast - * is no longer needed. I leave it in and commented out to remind us - * that we need to re-work this function to reflect this reallity. - * - * -- JRM 9/1/05 - */ - /* if only one process writes, need to broadcast the ret_value to - * other processes - */ - if(type != H5FD_MEM_DRAW) { - if(MPI_SUCCESS != (mpi_code = MPI_Bcast(&ret_value, (int)sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm))) - HMPI_DONE_ERROR(FAIL, "MPI_Bcast failed", mpi_code) - } /* end if */ -#endif /* JRM */ - #ifdef H5FDmpio_DEBUG - if (H5FD_mpio_Debug[(int)'t']) + if(H5FD_mpio_Debug[(int)'t']) fprintf(stdout, "proc %d: Leaving H5FD_mpio_write with ret_value=%d\n", file->mpi_rank, ret_value ); #endif FUNC_LEAVE_NOAPI(ret_value) -} +} /* end H5FD_mpio_write() */ /*------------------------------------------------------------------------- |