summaryrefslogtreecommitdiffstats
path: root/src/H5FDmpio.c
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2008-04-24 15:03:41 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2008-04-24 15:03:41 (GMT)
commit495ca9c7bb19553d2c87ce68013f1de2dff3d54b (patch)
tree792e1a9ecc8aa314dfa3d0538464e4f87ad55cf5 /src/H5FDmpio.c
parent16d4cae5b16ffb91298d8d232214afeb5112da6d (diff)
downloadhdf5-495ca9c7bb19553d2c87ce68013f1de2dff3d54b.zip
hdf5-495ca9c7bb19553d2c87ce68013f1de2dff3d54b.tar.gz
hdf5-495ca9c7bb19553d2c87ce68013f1de2dff3d54b.tar.bz2
[svn-r14860] Description:
Omnibus raw data I/O revisions, with wide-ranging changes and refactoring, in order to prepare for implementing "fast append" feature. These changes remove the majority of the code duplication for raw data I/O which has crept in over the last ten years and introduces a more object- oriented design for operating on different types of dataset storage. Chunked storage no longer has it's own I/O routines, it is now handled as either contiguous (if chunk is not pulled into the cache) or compact (if the chunk is cached in memory). No bug or feature changes, at least intentionally... :-) Tested on: FreeBSD/32 6.2 (duty) in debug mode FreeBSD/64 6.2 (liberty) w/C++ & FORTRAN, in debug mode Linux/32 2.6 (kagiso) w/PGI compilers, w/C++ & FORTRAN, w/threadsafe, in debug mode Linux/64-amd64 2.6 (smirom) w/default API=1.6.x, w/C++ & FORTRAN, in production mode Linux/64-ia64 2.6 (cobalt) w/Intel compilers, w/C++ & FORTRAN, in production mode Solaris/32 2.10 (linew) w/deprecated symbols disabled, w/C++ & FORTRAN, w/szip filter, in production mode Mac OS X/32 10.5.2 (amazon) in debug mode Linux/64-ia64 2.4 (tg-login3) w/parallel, w/FORTRAN, in production mode
Diffstat (limited to 'src/H5FDmpio.c')
-rw-r--r--src/H5FDmpio.c188
1 files changed, 61 insertions, 127 deletions
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index c8087bb..89a8637 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -572,15 +572,15 @@ H5Pset_dxpl_mpio_collective_opt(hid_t dxpl_id, H5FD_mpio_collective_opt_t opt_mo
HGOTO_ERROR(H5E_PLIST, H5E_BADTYPE, FAIL, "not a dxpl")
/* Set the transfer mode */
- if (H5P_set(plist,H5D_XFER_MPIO_COLLECTIVE_OPT_NAME,&opt_mode)<0)
+ if(H5P_set(plist, H5D_XFER_MPIO_COLLECTIVE_OPT_NAME, &opt_mode) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "unable to set value")
/* Initialize driver-specific properties */
- ret_value= H5P_set_driver(plist, H5FD_MPIO, NULL);
+ ret_value = H5P_set_driver(plist, H5FD_MPIO, NULL);
done:
FUNC_LEAVE_API(ret_value)
-}
+} /* end H5Pset_dxpl_mpio_collective_opt() */
/*-------------------------------------------------------------------------
@@ -1413,8 +1413,8 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
int type_size; /* MPI datatype used for I/O's size */
int io_size; /* Actual number of bytes requested */
H5P_genplist_t *plist; /* Property list pointer */
- unsigned use_view_this_time=0;
- herr_t ret_value=SUCCEED;
+ hbool_t use_view_this_time = FALSE;
+ herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI(H5FD_mpio_read, FAIL)
@@ -1464,7 +1464,7 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
MPI_Datatype file_type;
/* Remember that views are used */
- use_view_this_time=TRUE;
+ use_view_this_time = TRUE;
/* prepare for a full-blown xfer using btype, ftype, and disp */
if(H5P_get(plist,H5FD_MPI_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0)
@@ -1487,9 +1487,9 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
} /* end if */
/* Read the data. */
- if (use_view_this_time) {
+ if(use_view_this_time) {
H5FD_mpio_collective_opt_t coll_opt_mode;
- H5FD_mpio_collective_opt_t xfer_opt_mode;
+
#ifdef H5FDmpio_DEBUG
if (H5FD_mpio_Debug[(int)'t'])
fprintf(stdout, "H5FD_mpio_read: using MPIO collective mode\n");
@@ -1497,28 +1497,23 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
/* Peek the collective_opt property to check whether the application wants to do IO individually. */
coll_opt_mode = (H5FD_mpio_collective_opt_t)H5P_peek_unsigned(plist, H5D_XFER_MPIO_COLLECTIVE_OPT_NAME);
- /* Peek the xfer_opt_mode property to check whether the application wants to do IO individually. */
- xfer_opt_mode = (H5FD_mpio_collective_opt_t)H5P_peek_unsigned(plist, H5D_XFER_IO_XFER_OPT_MODE_NAME);
-
- if(coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO && xfer_opt_mode == H5FD_MPIO_COLLECTIVE_IO) {
+ if(coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO) {
#ifdef H5FDmpio_DEBUG
- if (H5FD_mpio_Debug[(int)'t'])
- fprintf(stdout, "H5FD_mpio_read: doing MPI collective IO\n");
+ if(H5FD_mpio_Debug[(int)'t'])
+ fprintf(stdout, "H5FD_mpio_read: doing MPI collective IO\n");
#endif
-/* Temporarily change to read_at_all
- if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat )))*/
- if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat )))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code)
- }
+ if(MPI_SUCCESS != (mpi_code = MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code)
+ } /* end if */
else {
#ifdef H5FDmpio_DEBUG
- if (H5FD_mpio_Debug[(int)'t'])
- fprintf(stdout, "H5FD_mpio_read: doing MPI independent IO\n");
+ if(H5FD_mpio_Debug[(int)'t'])
+ fprintf(stdout, "H5FD_mpio_read: doing MPI independent IO\n");
#endif
- if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat )))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
- }
+ if(MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
+ } /* end else */
/*
* Reset the file view when we used MPI derived types
@@ -1700,7 +1695,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
int size_i, bytes_written;
int type_size; /* MPI datatype used for I/O's size */
int io_size; /* Actual number of bytes requested */
- unsigned use_view_this_time=0;
+ hbool_t use_view_this_time = FALSE;
H5P_genplist_t *plist; /* Property list pointer */
herr_t ret_value=SUCCEED;
@@ -1718,26 +1713,25 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
assert(buf);
/* Portably initialize MPI status variable */
- HDmemset(&mpi_stat,0,sizeof(MPI_Status));
+ HDmemset(&mpi_stat, 0, sizeof(MPI_Status));
/* some numeric conversions */
- if (H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off)<0)
+ if(H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off) < 0)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off")
size_i = (int)size;
- if ((hsize_t)size_i != size)
+ if((hsize_t)size_i != size)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i")
#ifdef H5FDmpio_DEBUG
- if (H5FD_mpio_Debug[(int)'w'])
- fprintf(stdout, "in H5FD_mpio_write mpi_off=%ld size_i=%d\n",
- (long)mpi_off, size_i);
+ if(H5FD_mpio_Debug[(int)'w'])
+ fprintf(stdout, "in H5FD_mpio_write mpi_off=%ld size_i=%d\n", (long)mpi_off, size_i);
#endif
/* Obtain the data transfer properties */
if(NULL == (plist = (H5P_genplist_t *)H5I_object(dxpl_id)))
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list")
- if(type==H5FD_MEM_DRAW) {
+ if(type == H5FD_MEM_DRAW) {
H5FD_mpio_xfer_t xfer_mode; /* I/O tranfer mode */
/* Obtain the data transfer properties */
@@ -1749,114 +1743,71 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
* us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which
* could mean "use MPI_BYTE" by convention).
*/
- if(xfer_mode==H5FD_MPIO_COLLECTIVE) {
+ if(xfer_mode == H5FD_MPIO_COLLECTIVE) {
MPI_Datatype file_type;
/* Remember that views are used */
- use_view_this_time=TRUE;
+ use_view_this_time = TRUE;
/* prepare for a full-blown xfer using btype, ftype, and disp */
- if(H5P_get(plist,H5FD_MPI_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0)
+ if(H5P_get(plist, H5FD_MPI_XFER_MEM_MPI_TYPE_NAME, &buf_type) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
- if(H5P_get(plist,H5FD_MPI_XFER_FILE_MPI_TYPE_NAME,&file_type)<0)
+ if(H5P_get(plist, H5FD_MPI_XFER_FILE_MPI_TYPE_NAME, &file_type) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property")
/*
* Set the file view when we are using MPI derived types
*/
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
- if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, H5FD_mpi_native_g, file->info)))
+ if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, H5FD_mpi_native_g, file->info)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
/* When using types, use the address as the displacement for
* MPI_File_set_view and reset the address for the read to zero
*/
- mpi_off=0;
+ mpi_off = 0;
} /* end if */
} /* end if */
else {
- unsigned block_before_meta_write=0; /* Whether to block before a metadata write */
-
- /* Check if we need to syncronize all processes before attempting metadata write
- * (Prevents race condition where the process writing the metadata goes ahead
- * and writes the metadata to the file before all the processes have
- * read the data, "transmitting" data from the "future" to the reading
- * process. -QAK )
- *
- * The only time we don't want to block before a metadata write is when
- * we are flushing out a bunch of metadata. Then, we block before the
- * first write and don't block for further writes in the sequence.
- */
- if(H5P_exist_plist(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME)>0)
- if(H5P_get(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,&block_before_meta_write)<0)
- HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get H5AC property")
-
-#if 0 /* JRM */
- /* The metadata cache now only writes from process 0, which makes
- * this synchronization incorrect. I'm leaving this code commented
- * out instead of deleting it to remind us that we should re-write
- * this function so that a metadata write from any other process
- * should flag an error.
- * -- JRM 9/1/05
- */
- if(block_before_meta_write)
- if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code)
-#endif /* JRM */
-
- /* Only one process will do the actual write if all procs in comm write same metadata */
- if (file->mpi_rank != H5_PAR_META_WRITE) {
-#ifdef H5FDmpio_DEBUG
- if (H5FD_mpio_Debug[(int)'w']) {
- fprintf(stdout,
- " proc %d: in H5FD_mpio_write (write omitted)\n",
- file->mpi_rank );
- }
-#endif
+ /* Only one process can do the actual metadata write */
+ if(file->mpi_rank != H5_PAR_META_WRITE)
+#ifdef LATER
+ HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "can't write metadata from non-zero rank")
+#else /* LATER */
HGOTO_DONE(SUCCEED) /* skip the actual write */
- } /* end if */
+#endif /* LATER */
} /* end if */
/* Write the data. */
- if (use_view_this_time) {
- H5FD_mpio_collective_opt_t coll_opt_mode;
- H5FD_mpio_collective_opt_t xfer_opt_mode;
+ if(use_view_this_time) {
+ H5FD_mpio_collective_opt_t coll_opt_mode;
+
#ifdef H5FDmpio_DEBUG
- if (H5FD_mpio_Debug[(int)'t'])
+ if(H5FD_mpio_Debug[(int)'t'])
fprintf(stdout, "H5FD_mpio_write: using MPIO collective mode\n");
#endif
/* Peek the collective_opt property to check whether the application wants to do IO individually. */
- coll_opt_mode = (H5FD_mpio_collective_opt_t)H5P_peek_unsigned(plist,H5D_XFER_MPIO_COLLECTIVE_OPT_NAME);
-
- /* Peek the xfer_opt_mode property to check whether the application wants to do IO individually. */
- xfer_opt_mode = (H5FD_mpio_collective_opt_t)H5P_peek_unsigned(plist,H5D_XFER_IO_XFER_OPT_MODE_NAME);
+ coll_opt_mode = (H5FD_mpio_collective_opt_t)H5P_peek_unsigned(plist, H5D_XFER_MPIO_COLLECTIVE_OPT_NAME);
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
- if(coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO && xfer_opt_mode == H5FD_MPIO_COLLECTIVE_IO ) {
+ if(coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO) {
#ifdef H5FDmpio_DEBUG
- if (H5FD_mpio_Debug[(int)'t'])
- fprintf(stdout, "H5FD_mpio_write: doing MPI collective IO\n");
+ if(H5FD_mpio_Debug[(int)'t'])
+ fprintf(stdout, "H5FD_mpio_write: doing MPI collective IO\n");
#endif
- /* Temporarily change to _at
-if (MPI_SUCCESS != (mpi_code=MPI_File_write_at_all(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat)))
-*/
- if (MPI_SUCCESS != (mpi_code=MPI_File_write_at_all(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code)
- }
+ if(MPI_SUCCESS != (mpi_code = MPI_File_write_at_all(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code)
+ } /* end if */
else {
#ifdef H5FDmpio_DEBUG
- if (H5FD_mpio_Debug[(int)'t'])
- fprintf(stdout, "H5FD_mpio_write: doing MPI independent IO\n");
+ if(H5FD_mpio_Debug[(int)'t'])
+ fprintf(stdout, "H5FD_mpio_write: doing MPI independent IO\n");
#endif
-
- if (MPI_SUCCESS != (mpi_code=MPI_File_write_at(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat)))
- HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code)
- }
-
+ if(MPI_SUCCESS != (mpi_code = MPI_File_write_at(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code)
+ } /* end else */
- /*
- * Reset the file view when we used MPI derived types
- */
+ /* Reset the file view when we used MPI derived types */
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
if(MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, H5FD_mpi_native_g, file->info)))
HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
@@ -1872,48 +1823,31 @@ if (MPI_SUCCESS != (mpi_code=MPI_File_write_at_all(file->f, mpi_off, (void*)buf,
* datatype in this call though... (We aren't because using it causes
* the LANL "qsc" machine to dump core - 12/19/03) - QAK]
*/
- if (MPI_SUCCESS != (mpi_code=MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_written)))
+ if(MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_written)))
HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
/* Get the type's size */
- if (MPI_SUCCESS != (mpi_code=MPI_Type_size(buf_type,&type_size)))
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_size(buf_type, &type_size)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code)
/* Compute the actual number of bytes requested */
- io_size=type_size*size_i;
+ io_size = type_size * size_i;
/* Check for write failure */
- if (bytes_written != io_size)
+ if(bytes_written != io_size)
HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "file write failed")
/* Forget the EOF value (see H5FD_mpio_get_eof()) --rpm 1999-08-06 */
file->eof = HADDR_UNDEF;
done:
-
-#if 0 /* JRM */
- /* Since metadata writes are now done by process 0 only, this broadcast
- * is no longer needed. I leave it in and commented out to remind us
- * that we need to re-work this function to reflect this reallity.
- *
- * -- JRM 9/1/05
- */
- /* if only one process writes, need to broadcast the ret_value to
- * other processes
- */
- if(type != H5FD_MEM_DRAW) {
- if(MPI_SUCCESS != (mpi_code = MPI_Bcast(&ret_value, (int)sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm)))
- HMPI_DONE_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
- } /* end if */
-#endif /* JRM */
-
#ifdef H5FDmpio_DEBUG
- if (H5FD_mpio_Debug[(int)'t'])
+ if(H5FD_mpio_Debug[(int)'t'])
fprintf(stdout, "proc %d: Leaving H5FD_mpio_write with ret_value=%d\n",
file->mpi_rank, ret_value );
#endif
FUNC_LEAVE_NOAPI(ret_value)
-}
+} /* end H5FD_mpio_write() */
/*-------------------------------------------------------------------------