summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorRaymond Lu <songyulu@hdfgroup.org>2007-08-20 21:55:38 (GMT)
committerRaymond Lu <songyulu@hdfgroup.org>2007-08-20 21:55:38 (GMT)
commit6262a14f2e6f669f72e0212b4ce3654c9526f1dc (patch)
tree04eb74edc28e9abf1bfc1e9cf064291890b39c49 /src
parent2ab6b11aafaab9b09ba96781b19463e262990052 (diff)
downloadhdf5-6262a14f2e6f669f72e0212b4ce3654c9526f1dc.zip
hdf5-6262a14f2e6f669f72e0212b4ce3654c9526f1dc.tar.gz
hdf5-6262a14f2e6f669f72e0212b4ce3654c9526f1dc.tar.bz2
[svn-r14096] There're 3 changes in this checkin as below:
1. In H5Dwrite and H5Dread, let the data buffer point to a fake address if the application passes in an empty buffer. This is mainly for MPIO programs that some processes may not have any data to write or read but still participate the I/O. This solution solves some MPI's problem like the ChaMPIon on tungsten which doesn't support empty buffer. 2. The ChaMPIon on tungsten doesn't support complex derived MPI data type correctly and collective I/O when some processes don't have any data to write or read correctly. Detect the compiler "cmpicc" in the system-specific config file and set the variables for these two cases to false. The PHDF5 library already has set up a way to switch collective chunked I/O to independent under these two cases. 3. A bug fix - During the work of the optimization for compound data I/O, the case for switching collective chunked I/O to independent I/O was leftout. Fixed it by adding I/O caching to it in H5D_multi_chunk_collective_io in H5Dmpio.c. Tested on tungsten, cobalt, and kagiso for parallel; on linew and smirom for serial.
Diffstat (limited to 'src')
-rw-r--r--src/H5Dio.c24
-rw-r--r--src/H5Dmpio.c98
2 files changed, 104 insertions, 18 deletions
diff --git a/src/H5Dio.c b/src/H5Dio.c
index 6e75a93..259eb06 100644
--- a/src/H5Dio.c
+++ b/src/H5Dio.c
@@ -289,6 +289,7 @@ H5Dread(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id,
H5D_t *dset = NULL;
const H5S_t *mem_space = NULL;
const H5S_t *file_space = NULL;
+ char fake_char;
herr_t ret_value=SUCCEED; /* Return value */
FUNC_ENTER_API(H5Dread, FAIL)
@@ -326,6 +327,13 @@ H5Dread(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id,
if (!buf && H5S_GET_SELECT_NPOINTS(file_space)!=0)
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "no output buffer")
+ /* If the buffer is nil, and 0 element is selected, make a fake buffer.
+ * This is for some MPI package like ChaMPIon on NCSA's tungsten which
+ * doesn't support this feature.
+ */
+ if (!buf)
+ buf = &fake_char;
+
/* read raw data */
if (H5D_read(dset, mem_type_id, mem_space, file_space, plist_id, buf/*out*/) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "can't read data")
@@ -373,6 +381,7 @@ H5Dwrite(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id,
H5D_t *dset = NULL;
const H5S_t *mem_space = NULL;
const H5S_t *file_space = NULL;
+ char fake_char;
herr_t ret_value=SUCCEED; /* Return value */
FUNC_ENTER_API(H5Dwrite, FAIL)
@@ -409,6 +418,13 @@ H5Dwrite(hid_t dset_id, hid_t mem_type_id, hid_t mem_space_id,
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not xfer parms")
if(!buf && H5S_GET_SELECT_NPOINTS(file_space)!=0)
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "no output buffer")
+
+ /* If the buffer is nil, and 0 element is selected, make a fake buffer.
+ * This is for some MPI package like ChaMPIon on NCSA's tungsten which
+ * doesn't support this feature.
+ */
+ if (!buf)
+ buf = &fake_char;
/* write raw data */
if(H5D_write(dset, mem_type_id, mem_space, file_space, plist_id, buf) < 0)
@@ -788,6 +804,8 @@ H5D_contig_read(H5D_io_info_t *io_info, hsize_t nelmts,
FUNC_ENTER_NOAPI_NOINIT(H5D_contig_read)
+ assert (buf);
+
/* Initialize storage info for this dataset */
if (dataset->shared->dcpl_cache.efl.nused > 0)
HDmemcpy(&store.efl, &(dataset->shared->dcpl_cache.efl), sizeof(H5O_efl_t));
@@ -1066,6 +1084,8 @@ H5D_contig_write(H5D_io_info_t *io_info, hsize_t nelmts,
FUNC_ENTER_NOAPI_NOINIT(H5D_contig_write)
+ assert (buf);
+
/* Initialize storage info for this dataset */
if(dataset->shared->dcpl_cache.efl.nused > 0)
HDmemcpy(&store.efl, &(dataset->shared->dcpl_cache.efl), sizeof(H5O_efl_t));
@@ -1344,6 +1364,8 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts,
FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_read)
+ assert (buf);
+
/* Map elements between file and memory for each chunk*/
if(H5D_create_chunk_map(dataset, mem_type, file_space, mem_space, &fm) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't build chunk mapping")
@@ -1734,6 +1756,8 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts,
FUNC_ENTER_NOAPI_NOINIT(H5D_chunk_write)
+ assert (buf);
+
/* Map elements between file and memory for each chunk*/
if(H5D_create_chunk_map(dataset, mem_type, file_space, mem_space, &fm) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't build chunk mapping")
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c
index 90f8974..a6981ee 100644
--- a/src/H5Dmpio.c
+++ b/src/H5Dmpio.c
@@ -988,6 +988,7 @@ H5D_link_chunk_collective_io(H5D_io_info_t *io_info,fm_map *fm,const void *buf,
for ( i = 0; i< num_chunk;i++){
if (MPI_SUCCESS != (mpi_code= MPI_Type_free( chunk_mtype+i )))
HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code);
+
if (MPI_SUCCESS != (mpi_code= MPI_Type_free( chunk_ftype+i )))
HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code);
}
@@ -1014,7 +1015,7 @@ H5D_link_chunk_collective_io(H5D_io_info_t *io_info,fm_map *fm,const void *buf,
if(H5DEBUG(D))
HDfprintf(H5DEBUG(D),"before coming to final collective IO\n");
#endif
-
+
if(H5D_final_collective_io(io_info,&chunk_final_ftype,&chunk_final_mtype,&coll_info,buf,do_write)<0)
HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL,"couldn't finish MPI-IO");
@@ -1059,7 +1060,6 @@ done:
static herr_t
H5D_multi_chunk_collective_io(H5D_io_info_t *io_info,fm_map *fm,const void *buf, hbool_t do_write)
{
-
unsigned i, total_chunk;
hsize_t ori_total_chunk;
uint8_t *chunk_io_option;
@@ -1070,6 +1070,16 @@ H5D_multi_chunk_collective_io(H5D_io_info_t *io_info,fm_map *fm,const void *buf,
H5D_storage_t store; /* union of EFL and chunk pointer in file space */
hbool_t select_chunk;
hbool_t last_io_mode_coll = TRUE;
+
+ void *chunk = NULL; /* Pointer to the data chunk in cache */
+ H5D_t *dataset=io_info->dset;/* Local pointer to dataset info */
+ H5D_istore_ud1_t udata; /*B-tree pass-through */
+ haddr_t caddr; /* Address of the cached chunk */
+ size_t accessed_bytes; /*total accessed size in a chunk */
+ unsigned idx_hint=0; /* Cache index hint */
+ hbool_t dirty = TRUE; /* Flag for cache flushing */
+ hbool_t relax=TRUE; /* Whether whole chunk is selected */
+
herr_t ret_value = SUCCEED;
#ifdef H5Dmpio_DEBUG
int mpi_rank;
@@ -1167,18 +1177,41 @@ H5D_multi_chunk_collective_io(H5D_io_info_t *io_info,fm_map *fm,const void *buf,
if(H5D_ioinfo_make_ind(io_info) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't switch to independent I/O")
+ /* Load the chunk into cache. But if the whole chunk is written,
+ * simply allocate space instead of load the chunk. */
+ if(HADDR_UNDEF==(caddr = H5D_istore_get_addr(io_info, &udata)))
+ HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL,"couldn't get chunk info from skipped list");
+
+ if (H5D_istore_if_load(dataset, caddr)) {
+ accessed_bytes = chunk_info->chunk_points * H5T_get_size(dataset->shared->type);
+ if((do_write && (accessed_bytes != dataset->shared->layout.u.chunk.size)) || !do_write)
+ relax=FALSE;
+
+ if(NULL == (chunk = H5D_istore_lock(io_info, &udata, relax, &idx_hint)))
+ HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to read raw data chunk")
+ } else
+ chunk = NULL;
+
if(do_write) {
if((io_info->ops.write)(io_info,
chunk_info->chunk_points,H5T_get_size(io_info->dset->shared->type),
- chunk_info->fspace,chunk_info->mspace,0, NULL, buf) < 0)
+ chunk_info->fspace,chunk_info->mspace,caddr,chunk, buf) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed")
}
else {
if((io_info->ops.read)(io_info,
chunk_info->chunk_points,H5T_get_size(io_info->dset->shared->type),
- chunk_info->fspace,chunk_info->mspace,0, NULL, buf) < 0)
+ chunk_info->fspace,chunk_info->mspace,caddr,chunk, buf) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed")
- }
+ }
+
+ /* Release the cache lock on the chunk. */
+ if (H5D_istore_if_load(dataset, caddr)) {
+ if(!do_write) dirty = FALSE;
+
+ if(H5D_istore_unlock(io_info, dirty, idx_hint, chunk, accessed_bytes) < 0)
+ HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to unlock raw data chunk")
+ }
#else
if(!last_io_mode_coll)
/* using independent I/O with file setview.*/
@@ -1250,6 +1283,16 @@ H5D_multi_chunk_collective_io_no_opt(H5D_io_info_t *io_info,fm_map *fm,const voi
haddr_t chunk_addr;
H5SL_node_t *chunk_node; /* Current node in chunk skip list */
H5D_storage_t store; /* union of EFL and chunk pointer in file space */
+ H5D_chunk_info_t *chunk_info; /* chunk information */
+ hbool_t make_ind, make_coll; /* Flags to indicate that the MPI mode should change */
+
+ void *chunk = NULL; /* Pointer to the data chunk in cache */
+ H5D_t *dataset=io_info->dset;/* Local pointer to dataset info */
+ H5D_istore_ud1_t udata; /*B-tree pass-through */
+ size_t accessed_bytes; /*total accessed size in a chunk */
+ unsigned idx_hint=0; /* Cache index hint */
+ hbool_t dirty = TRUE; /* Flag for cache flushing */
+ hbool_t relax=TRUE; /* Whether whole chunk is selected */
herr_t ret_value = SUCCEED;
#ifdef H5Dmpio_DEBUG
@@ -1317,24 +1360,43 @@ H5D_multi_chunk_collective_io_no_opt(H5D_io_info_t *io_info,fm_map *fm,const voi
if(H5D_ioinfo_make_ind(io_info) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't switch to independent I/O")
+ if(HADDR_UNDEF==(chunk_addr = H5D_istore_get_addr(io_info, &udata)))
+ HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL,"couldn't get chunk info from skipped list");
+
if(make_ind) {/*independent I/O */
-
- if(do_write) {
- if((io_info->ops.write)(io_info,
+ /* Load the chunk into cache. But if the whole chunk is written,
+ * simply allocate space instead of load the chunk. */
+ if (H5D_istore_if_load(dataset, chunk_addr)) {
+ accessed_bytes = chunk_info->chunk_points * H5T_get_size(dataset->shared->type);
+ if((do_write && (accessed_bytes != dataset->shared->layout.u.chunk.size)) || !do_write)
+ relax=FALSE;
+
+ if(NULL == (chunk = H5D_istore_lock(io_info, &udata, relax, &idx_hint)))
+ HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to read raw data chunk")
+ } else
+ chunk = NULL;
+
+ if(do_write) {
+ if((io_info->ops.write)(io_info,
chunk_info->chunk_points,H5T_get_size(io_info->dset->shared->type),
- chunk_info->fspace,chunk_info->mspace, (hsize_t)0, NULL, buf) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed")
- }
- else {
- if((io_info->ops.read)(io_info,
+ chunk_info->fspace,chunk_info->mspace, chunk_addr, chunk, buf) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "optimized write failed")
+ } else {
+ if((io_info->ops.read)(io_info,
chunk_info->chunk_points,H5T_get_size(io_info->dset->shared->type),
- chunk_info->fspace,chunk_info->mspace, (hsize_t)0, NULL, buf) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed")
- }
+ chunk_info->fspace,chunk_info->mspace, chunk_addr, chunk, buf) < 0)
+ HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "optimized read failed")
+ }
+
+ /* Release the cache lock on the chunk. */
+ if (H5D_istore_if_load(dataset, chunk_addr)) {
+ if(!do_write) dirty = FALSE;
+
+ if(H5D_istore_unlock(io_info, dirty, idx_hint, chunk, accessed_bytes) < 0)
+ HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to unlock raw data chunk")
+ }
}
else { /*collective I/O */
- if(HADDR_UNDEF==(chunk_addr = H5D_istore_get_addr(io_info,NULL)))
- HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL,"couldn't get chunk info from skipped list");
if(H5D_inter_collective_io(io_info,chunk_info->fspace,chunk_info->mspace,
chunk_addr,buf,do_write ) < 0)
HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL,"couldn't finish shared collective MPI-IO");