summaryrefslogtreecommitdiffstats
path: root/src/H5Distore.c
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2002-05-17 12:53:46 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2002-05-17 12:53:46 (GMT)
commita6b4cba798a494dea1d29474cc5658f7003615d9 (patch)
tree5ffa6f7b9868849e81a6392b29ad59ec9218dfe1 /src/H5Distore.c
parent567c04276158059089d64e0e9fd5b9c7e1b8d7ba (diff)
downloadhdf5-a6b4cba798a494dea1d29474cc5658f7003615d9.zip
hdf5-a6b4cba798a494dea1d29474cc5658f7003615d9.tar.gz
hdf5-a6b4cba798a494dea1d29474cc5658f7003615d9.tar.bz2
[svn-r5429] Purpose:
Bug fix/Code improvement. Description: Currently, the chunk data allocation routine invoked to allocate space for the entire dataset is inefficient. It writes out each chunk in the dataset, whether it is already allocated or not. Additionally, this happens not only when it is created, but also anytime it is opened for writing, or the dataset is extended. Worse, there's too much parallel I/O syncronization, which slows things down even more. Solution: Only attempt to write out chunks that don't already exist. Additionally, share the I/O writing between all the nodes, instead of writing everything with process 0. Then, only block with MPI_Barrier if chunks were actually created. Platforms tested: IRIX64 6.5 (modi4)
Diffstat (limited to 'src/H5Distore.c')
-rw-r--r--src/H5Distore.c167
1 files changed, 102 insertions, 65 deletions
diff --git a/src/H5Distore.c b/src/H5Distore.c
index f5bee8a..1d7feeb 100644
--- a/src/H5Distore.c
+++ b/src/H5Distore.c
@@ -1037,16 +1037,10 @@ H5F_istore_flush_entry(H5F_t *f, H5F_rdcc_ent_t *ent, hbool_t reset)
* Create the chunk it if it doesn't exist, or reallocate the chunk if
* its size changed. Then write the data into the file.
*/
- if (H5B_insert(f, H5B_ISTORE, ent->layout->addr, ent->split_ratios,
- &udata)<0) {
- HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL,
- "unable to allocate chunk");
- }
- if (H5F_block_write(f, H5FD_MEM_DRAW, udata.addr, udata.key.nbytes, H5P_DATASET_XFER_DEFAULT,
- buf)<0) {
- HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL,
- "unable to write raw data to file");
- }
+ if (H5B_insert(f, H5B_ISTORE, ent->layout->addr, ent->split_ratios, &udata)<0)
+ HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "unable to allocate chunk");
+ if (H5F_block_write(f, H5FD_MEM_DRAW, udata.addr, udata.key.nbytes, H5P_DATASET_XFER_DEFAULT, buf)<0)
+ HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "unable to write raw data to file");
/* Mark cache entry as clean */
ent->dirty = FALSE;
@@ -2413,21 +2407,32 @@ done:
* Robb Matzke, 1999-08-02
* The split_ratios are passed in as part of the data transfer
* property list.
+ *
+ * Quincey Koziol, 2002-05-16
+ * Rewrote algorithm to allocate & write blocks without using
+ * lock/unlock code.
*-------------------------------------------------------------------------
*/
+#ifdef H5_HAVE_PARALLEL
herr_t
H5F_istore_allocate(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
const hsize_t *space_dim, H5P_genplist_t *dc_plist)
{
-
- int i, carry;
- unsigned u;
- hssize_t chunk_offset[H5O_LAYOUT_NDIMS];
- uint8_t *chunk=NULL;
- unsigned idx_hint=0;
- hsize_t chunk_size;
- H5O_pline_t pline; /* I/O pipeline information */
- H5O_fill_t fill; /* Fill value information */
+ hssize_t chunk_offset[H5O_LAYOUT_NDIMS]; /* Offset of current chunk */
+ hsize_t chunk_size; /* Size of chunk in bytes */
+ H5O_pline_t pline; /* I/O pipeline information */
+ H5O_fill_t fill; /* Fill value information */
+ H5F_istore_ud1_t udata; /* B-tree pass-through for creating chunk */
+ void *chunk=NULL; /* Chunk buffer for writing fill values */
+ H5P_genplist_t *dx_plist; /* Data xfer property list */
+ double split_ratios[3];/* B-tree node splitting ratios */
+ int mpi_rank; /* This process's rank */
+ int mpi_size; /* Total # of processes */
+ int mpi_round=0; /* Current process responsible for I/O */
+ unsigned chunk_allocated=0; /* Flag to indicate that chunk was actually allocated */
+ int carry; /* Flag to indicate that chunk increment carrys to higher dimension (sorta) */
+ int i; /* Local index variable */
+ unsigned u; /* Local index variable */
herr_t ret_value=SUCCEED; /* Return value */
FUNC_ENTER(H5F_istore_allocate, FAIL);
@@ -2438,52 +2443,86 @@ H5F_istore_allocate(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
assert(layout && H5D_CHUNKED==layout->type);
assert(layout->ndims>0 && layout->ndims<=H5O_LAYOUT_NDIMS);
assert(H5F_addr_defined(layout->addr));
+ assert(H5I_GENPROP_LST==H5I_get_type(dxpl_id));
+ assert(TRUE==H5P_isa_class(dxpl_id,H5P_DATASET_XFER));
+ assert(dc_plist!=NULL);
- /* Get necessary properties from property list */
+ /* Get necessary properties from dataset creation property list */
if(H5P_get(dc_plist, H5D_CRT_FILL_VALUE_NAME, &fill) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get fill value");
+ HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL, "can't get fill value");
if(H5P_get(dc_plist, H5D_CRT_DATA_PIPELINE_NAME, &pline) < 0)
- HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get data pipeline");
+ HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL, "can't get data pipeline");
+
+ /* Get necessary properties from dataset transfer property list */
+ if (TRUE!=H5P_isa_class(dxpl_id,H5P_DATASET_XFER) || NULL == (dx_plist = H5I_object(dxpl_id)))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset transfer property list");
+ if(H5P_get(dx_plist,H5D_XFER_BTREE_SPLIT_RATIO_NAME,split_ratios)<0)
+ HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL, "can't get B-tree split ratios");
+
+ /* Can't use data I/O pipeline in parallel (yet) */
+ if (pline.nfilters>0)
+ HGOTO_ERROR(H5E_STORAGE, H5E_UNSUPPORTED, FAIL, "can't use data pipeline in parallel");
/*
* Setup indice to go through all chunks. (Future improvement
* should allocate only chunks that have no file space assigned yet.
*/
for (u=0, chunk_size=1; u<layout->ndims; u++) {
- chunk_offset[u]=0;
+ chunk_offset[u] = 0;
chunk_size *= layout->dim[u];
} /* end for */
- /* Loop over all chunks */
- carry=0;
- while (carry==0) {
- /* No file space assigned yet. Allocate it. */
- /* The following needs improvement like calling the */
- /* allocation directly rather than indirectly using the */
- /* allocation effect in the unlock process. */
+ /* Allocate chunk buffer for processes to use when writing fill values */
+ if (NULL==(chunk = H5F_istore_chunk_alloc(chunk_size)))
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for chunk");
+ /* Fill the chunk with the proper values */
+ if(fill.buf) {
+ /*
+ * Replicate the fill value throughout the chunk.
+ */
+ assert(0==chunk_size % fill.size);
+ H5V_array_fill(chunk, fill.buf, fill.size, chunk_size/fill.size);
+ } else {
/*
- * Lock the chunk, copy from application to chunk, then unlock the
- * chunk.
+ * No fill value was specified, assume all zeros.
*/
+ HDmemset (chunk, 0, chunk_size);
+ } /* end else */
-#ifdef H5_HAVE_PARALLEL
- /* rky 981207 Serialize access to this critical region. */
- if (SUCCEED!= H5FD_mpio_wait_for_left_neighbor(f->shared->lf))
- HGOTO_ERROR (H5E_IO, H5E_WRITEERROR, FAIL, "unable to lock the data chunk");
-#endif
- if (NULL==(chunk=H5F_istore_lock(f, dxpl_id, layout, &pline,
- &fill, chunk_offset, FALSE, &idx_hint)))
- HGOTO_ERROR (H5E_IO, H5E_WRITEERROR, FAIL, "unable to read raw data chunk");
-
- H5_CHECK_OVERFLOW(chunk_size,hsize_t,size_t);
- if (H5F_istore_unlock(f, dxpl_id, layout, &pline, TRUE,
- chunk_offset, &idx_hint, chunk, (size_t)chunk_size)<0)
- HGOTO_ERROR (H5E_IO, H5E_WRITEERROR, FAIL, "uanble to unlock raw data chunk");
-#ifdef H5_HAVE_PARALLEL
- if (SUCCEED!= H5FD_mpio_signal_right_neighbor(f->shared->lf))
- HGOTO_ERROR (H5E_IO, H5E_WRITEERROR, FAIL, "unable to unlock the data chunk");
-#endif
+ /* Retrieve up MPI parameters */
+ if ((mpi_rank=H5FD_mpio_mpi_rank(f->shared->lf))<0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank");
+ if ((mpi_size=H5FD_mpio_mpi_size(f->shared->lf))<0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI size");
+
+ /* Loop over all chunks */
+ carry=0;
+ while (carry==0) {
+ /* Check if the chunk exists yet */
+ if(H5F_istore_get_addr(f,layout,chunk_offset)==HADDR_UNDEF) {
+ /* Initialize the chunk information */
+ udata.mesg = *layout;
+ udata.key.filter_mask = 0;
+ udata.addr = HADDR_UNDEF;
+ udata.key.nbytes = chunk_size;
+ for (u=0; u<layout->ndims; u++)
+ udata.key.offset[u] = chunk_offset[u];
+
+ /* Allocate the chunk with all processes */
+ if (H5B_insert(f, H5B_ISTORE, layout->addr, split_ratios, &udata)<0)
+ HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "unable to allocate chunk");
+
+ /* Round-robin write the chunks out from only one process */
+ if(mpi_round==mpi_rank) {
+ if (H5F_block_write(f, H5FD_MEM_DRAW, udata.addr, udata.key.nbytes, dxpl_id, chunk)<0)
+ HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "unable to write raw data to file");
+ } /* end if */
+ mpi_round=(++mpi_round)%mpi_size;
+
+ /* Indicate that a chunk was allocated */
+ chunk_allocated=1;
+ } /* end if */
/* Increment indices */
for (i=layout->ndims-1, carry=1; i>=0 && carry; --i) {
@@ -2496,24 +2535,24 @@ H5F_istore_allocate(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout,
} /* end for */
} /* end while */
-#ifdef H5_HAVE_PARALLEL
- /*
- * rky 980923
- *
- * The following barrier is a temporary fix to prevent overwriting real
- * data caused by a race between one proc's call of H5F_istore_allocate
- * (from H5D_init_storage, ultimately from H5Dcreate and H5Dextend) and
- * another proc's call of H5Dwrite. Eventually, this barrier should be
- * removed, when H5D_init_storage is changed to call H5MF_alloc directly
- * to allocate space, instead of calling H5F_istore_unlock.
- */
- if (MPI_Barrier(H5FD_mpio_communicator(f->shared->lf)))
- HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_Barrier failed");
-#endif
+ /* Only need to block at the barrier if we actually allocated a chunk */
+ if(chunk_allocated) {
+ /* Wait at barrier to avoid race conditions where some processes are
+ * still writing out chunks and other processes race ahead to read
+ * them in, getting bogus data.
+ */
+ if (MPI_Barrier(H5FD_mpio_communicator(f->shared->lf)))
+ HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_Barrier failed");
+ } /* end if */
done:
+ /* Free the chunk for fill values */
+ if(chunk!=NULL)
+ H5F_istore_chunk_free(chunk);
+
FUNC_LEAVE(ret_value);
}
+#endif /* H5_HAVE_PARALLEL */
/*-------------------------------------------------------------------------
@@ -2824,7 +2863,6 @@ H5F_istore_initialize_by_extent(H5F_t *f, const H5O_layout_t *layout,
hsize_t idx_max[H5O_LAYOUT_NDIMS];
hsize_t sub_size[H5O_LAYOUT_NDIMS];
hsize_t naccessed; /*bytes accessed in chunk */
- hsize_t elm_size; /*size of an element in bytes */
hsize_t end_chunk; /*chunk position counter */
hssize_t start[H5O_LAYOUT_NDIMS]; /*starting location of hyperslab */
hsize_t count[H5O_LAYOUT_NDIMS]; /*element count of hyperslab */
@@ -2866,7 +2904,6 @@ H5F_istore_initialize_by_extent(H5F_t *f, const H5O_layout_t *layout,
for(i = 0; i < rank; i++)
size[i] = curr_dims[i];
size[i] = layout->dim[i];
- elm_size = size[i];
/* Default dataset transfer property list */
dxpl_id = H5P_DATASET_XFER_DEFAULT;