diff options
-rw-r--r-- | src/H5Distore.c | 167 | ||||
-rw-r--r-- | src/H5FDmpio.c | 56 | ||||
-rw-r--r-- | src/H5FDmpio.h | 2 | ||||
-rw-r--r-- | src/H5Fistore.c | 167 | ||||
-rw-r--r-- | src/H5Fpkg.h | 2 |
5 files changed, 264 insertions, 130 deletions
diff --git a/src/H5Distore.c b/src/H5Distore.c index f5bee8a..1d7feeb 100644 --- a/src/H5Distore.c +++ b/src/H5Distore.c @@ -1037,16 +1037,10 @@ H5F_istore_flush_entry(H5F_t *f, H5F_rdcc_ent_t *ent, hbool_t reset) * Create the chunk it if it doesn't exist, or reallocate the chunk if * its size changed. Then write the data into the file. */ - if (H5B_insert(f, H5B_ISTORE, ent->layout->addr, ent->split_ratios, - &udata)<0) { - HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, - "unable to allocate chunk"); - } - if (H5F_block_write(f, H5FD_MEM_DRAW, udata.addr, udata.key.nbytes, H5P_DATASET_XFER_DEFAULT, - buf)<0) { - HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, - "unable to write raw data to file"); - } + if (H5B_insert(f, H5B_ISTORE, ent->layout->addr, ent->split_ratios, &udata)<0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "unable to allocate chunk"); + if (H5F_block_write(f, H5FD_MEM_DRAW, udata.addr, udata.key.nbytes, H5P_DATASET_XFER_DEFAULT, buf)<0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "unable to write raw data to file"); /* Mark cache entry as clean */ ent->dirty = FALSE; @@ -2413,21 +2407,32 @@ done: * Robb Matzke, 1999-08-02 * The split_ratios are passed in as part of the data transfer * property list. + * + * Quincey Koziol, 2002-05-16 + * Rewrote algorithm to allocate & write blocks without using + * lock/unlock code. *------------------------------------------------------------------------- */ +#ifdef H5_HAVE_PARALLEL herr_t H5F_istore_allocate(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout, const hsize_t *space_dim, H5P_genplist_t *dc_plist) { - - int i, carry; - unsigned u; - hssize_t chunk_offset[H5O_LAYOUT_NDIMS]; - uint8_t *chunk=NULL; - unsigned idx_hint=0; - hsize_t chunk_size; - H5O_pline_t pline; /* I/O pipeline information */ - H5O_fill_t fill; /* Fill value information */ + hssize_t chunk_offset[H5O_LAYOUT_NDIMS]; /* Offset of current chunk */ + hsize_t chunk_size; /* Size of chunk in bytes */ + H5O_pline_t pline; /* I/O pipeline information */ + H5O_fill_t fill; /* Fill value information */ + H5F_istore_ud1_t udata; /* B-tree pass-through for creating chunk */ + void *chunk=NULL; /* Chunk buffer for writing fill values */ + H5P_genplist_t *dx_plist; /* Data xfer property list */ + double split_ratios[3];/* B-tree node splitting ratios */ + int mpi_rank; /* This process's rank */ + int mpi_size; /* Total # of processes */ + int mpi_round=0; /* Current process responsible for I/O */ + unsigned chunk_allocated=0; /* Flag to indicate that chunk was actually allocated */ + int carry; /* Flag to indicate that chunk increment carrys to higher dimension (sorta) */ + int i; /* Local index variable */ + unsigned u; /* Local index variable */ herr_t ret_value=SUCCEED; /* Return value */ FUNC_ENTER(H5F_istore_allocate, FAIL); @@ -2438,52 +2443,86 @@ H5F_istore_allocate(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout, assert(layout && H5D_CHUNKED==layout->type); assert(layout->ndims>0 && layout->ndims<=H5O_LAYOUT_NDIMS); assert(H5F_addr_defined(layout->addr)); + assert(H5I_GENPROP_LST==H5I_get_type(dxpl_id)); + assert(TRUE==H5P_isa_class(dxpl_id,H5P_DATASET_XFER)); + assert(dc_plist!=NULL); - /* Get necessary properties from property list */ + /* Get necessary properties from dataset creation property list */ if(H5P_get(dc_plist, H5D_CRT_FILL_VALUE_NAME, &fill) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get fill value"); + HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL, "can't get fill value"); if(H5P_get(dc_plist, H5D_CRT_DATA_PIPELINE_NAME, &pline) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get data pipeline"); + HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL, "can't get data pipeline"); + + /* Get necessary properties from dataset transfer property list */ + if (TRUE!=H5P_isa_class(dxpl_id,H5P_DATASET_XFER) || NULL == (dx_plist = H5I_object(dxpl_id))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset transfer property list"); + if(H5P_get(dx_plist,H5D_XFER_BTREE_SPLIT_RATIO_NAME,split_ratios)<0) + HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL, "can't get B-tree split ratios"); + + /* Can't use data I/O pipeline in parallel (yet) */ + if (pline.nfilters>0) + HGOTO_ERROR(H5E_STORAGE, H5E_UNSUPPORTED, FAIL, "can't use data pipeline in parallel"); /* * Setup indice to go through all chunks. (Future improvement * should allocate only chunks that have no file space assigned yet. */ for (u=0, chunk_size=1; u<layout->ndims; u++) { - chunk_offset[u]=0; + chunk_offset[u] = 0; chunk_size *= layout->dim[u]; } /* end for */ - /* Loop over all chunks */ - carry=0; - while (carry==0) { - /* No file space assigned yet. Allocate it. */ - /* The following needs improvement like calling the */ - /* allocation directly rather than indirectly using the */ - /* allocation effect in the unlock process. */ + /* Allocate chunk buffer for processes to use when writing fill values */ + if (NULL==(chunk = H5F_istore_chunk_alloc(chunk_size))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for chunk"); + /* Fill the chunk with the proper values */ + if(fill.buf) { + /* + * Replicate the fill value throughout the chunk. + */ + assert(0==chunk_size % fill.size); + H5V_array_fill(chunk, fill.buf, fill.size, chunk_size/fill.size); + } else { /* - * Lock the chunk, copy from application to chunk, then unlock the - * chunk. + * No fill value was specified, assume all zeros. */ + HDmemset (chunk, 0, chunk_size); + } /* end else */ -#ifdef H5_HAVE_PARALLEL - /* rky 981207 Serialize access to this critical region. */ - if (SUCCEED!= H5FD_mpio_wait_for_left_neighbor(f->shared->lf)) - HGOTO_ERROR (H5E_IO, H5E_WRITEERROR, FAIL, "unable to lock the data chunk"); -#endif - if (NULL==(chunk=H5F_istore_lock(f, dxpl_id, layout, &pline, - &fill, chunk_offset, FALSE, &idx_hint))) - HGOTO_ERROR (H5E_IO, H5E_WRITEERROR, FAIL, "unable to read raw data chunk"); - - H5_CHECK_OVERFLOW(chunk_size,hsize_t,size_t); - if (H5F_istore_unlock(f, dxpl_id, layout, &pline, TRUE, - chunk_offset, &idx_hint, chunk, (size_t)chunk_size)<0) - HGOTO_ERROR (H5E_IO, H5E_WRITEERROR, FAIL, "uanble to unlock raw data chunk"); -#ifdef H5_HAVE_PARALLEL - if (SUCCEED!= H5FD_mpio_signal_right_neighbor(f->shared->lf)) - HGOTO_ERROR (H5E_IO, H5E_WRITEERROR, FAIL, "unable to unlock the data chunk"); -#endif + /* Retrieve up MPI parameters */ + if ((mpi_rank=H5FD_mpio_mpi_rank(f->shared->lf))<0) + HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank"); + if ((mpi_size=H5FD_mpio_mpi_size(f->shared->lf))<0) + HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI size"); + + /* Loop over all chunks */ + carry=0; + while (carry==0) { + /* Check if the chunk exists yet */ + if(H5F_istore_get_addr(f,layout,chunk_offset)==HADDR_UNDEF) { + /* Initialize the chunk information */ + udata.mesg = *layout; + udata.key.filter_mask = 0; + udata.addr = HADDR_UNDEF; + udata.key.nbytes = chunk_size; + for (u=0; u<layout->ndims; u++) + udata.key.offset[u] = chunk_offset[u]; + + /* Allocate the chunk with all processes */ + if (H5B_insert(f, H5B_ISTORE, layout->addr, split_ratios, &udata)<0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "unable to allocate chunk"); + + /* Round-robin write the chunks out from only one process */ + if(mpi_round==mpi_rank) { + if (H5F_block_write(f, H5FD_MEM_DRAW, udata.addr, udata.key.nbytes, dxpl_id, chunk)<0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "unable to write raw data to file"); + } /* end if */ + mpi_round=(++mpi_round)%mpi_size; + + /* Indicate that a chunk was allocated */ + chunk_allocated=1; + } /* end if */ /* Increment indices */ for (i=layout->ndims-1, carry=1; i>=0 && carry; --i) { @@ -2496,24 +2535,24 @@ H5F_istore_allocate(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout, } /* end for */ } /* end while */ -#ifdef H5_HAVE_PARALLEL - /* - * rky 980923 - * - * The following barrier is a temporary fix to prevent overwriting real - * data caused by a race between one proc's call of H5F_istore_allocate - * (from H5D_init_storage, ultimately from H5Dcreate and H5Dextend) and - * another proc's call of H5Dwrite. Eventually, this barrier should be - * removed, when H5D_init_storage is changed to call H5MF_alloc directly - * to allocate space, instead of calling H5F_istore_unlock. - */ - if (MPI_Barrier(H5FD_mpio_communicator(f->shared->lf))) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_Barrier failed"); -#endif + /* Only need to block at the barrier if we actually allocated a chunk */ + if(chunk_allocated) { + /* Wait at barrier to avoid race conditions where some processes are + * still writing out chunks and other processes race ahead to read + * them in, getting bogus data. + */ + if (MPI_Barrier(H5FD_mpio_communicator(f->shared->lf))) + HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_Barrier failed"); + } /* end if */ done: + /* Free the chunk for fill values */ + if(chunk!=NULL) + H5F_istore_chunk_free(chunk); + FUNC_LEAVE(ret_value); } +#endif /* H5_HAVE_PARALLEL */ /*------------------------------------------------------------------------- @@ -2824,7 +2863,6 @@ H5F_istore_initialize_by_extent(H5F_t *f, const H5O_layout_t *layout, hsize_t idx_max[H5O_LAYOUT_NDIMS]; hsize_t sub_size[H5O_LAYOUT_NDIMS]; hsize_t naccessed; /*bytes accessed in chunk */ - hsize_t elm_size; /*size of an element in bytes */ hsize_t end_chunk; /*chunk position counter */ hssize_t start[H5O_LAYOUT_NDIMS]; /*starting location of hyperslab */ hsize_t count[H5O_LAYOUT_NDIMS]; /*element count of hyperslab */ @@ -2866,7 +2904,6 @@ H5F_istore_initialize_by_extent(H5F_t *f, const H5O_layout_t *layout, for(i = 0; i < rank; i++) size[i] = curr_dims[i]; size[i] = layout->dim[i]; - elm_size = size[i]; /* Default dataset transfer property list */ dxpl_id = H5P_DATASET_XFER_DEFAULT; diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index f4ba335..3572390 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -449,6 +449,62 @@ H5FD_mpio_communicator(H5FD_t *_file) /*------------------------------------------------------------------------- + * Function: H5FD_mpio_mpi_rank + * + * Purpose: Returns the MPI rank for a process + * + * Return: Success: non-negative + * Failure: negative + * + * Programmer: Quincey Koziol + * Thursday, May 16, 2002 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +int +H5FD_mpio_mpi_rank(H5FD_t *_file) +{ + H5FD_mpio_t *file = (H5FD_mpio_t*)_file; + + FUNC_ENTER(H5FD_mpio_mpi_rank, FAIL); + assert(file); + assert(H5FD_MPIO==file->pub.driver_id); + + FUNC_LEAVE(file->mpi_rank); +} /* end H5FD_mpio_mpi_rank() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_mpio_mpi_size + * + * Purpose: Returns the number of MPI processes + * + * Return: Success: non-negative + * Failure: negative + * + * Programmer: Quincey Koziol + * Thursday, May 16, 2002 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +int +H5FD_mpio_mpi_size(H5FD_t *_file) +{ + H5FD_mpio_t *file = (H5FD_mpio_t*)_file; + + FUNC_ENTER(H5FD_mpio_mpi_rank, FAIL); + assert(file); + assert(H5FD_MPIO==file->pub.driver_id); + + FUNC_LEAVE(file->mpi_size); +} /* end H5FD_mpio_mpi_size() */ + + +/*------------------------------------------------------------------------- * Function: H5FD_mpio_setup * * Purpose: Set the buffer type BTYPE, file type FTYPE, and absolute base diff --git a/src/H5FDmpio.h b/src/H5FDmpio.h index 425a346..4750ef2 100644 --- a/src/H5FDmpio.h +++ b/src/H5FDmpio.h @@ -62,6 +62,8 @@ __DLL__ herr_t H5FD_mpio_setup(H5FD_t *_file, MPI_Datatype btype, MPI_Datatype f __DLL__ herr_t H5FD_mpio_wait_for_left_neighbor(H5FD_t *file); __DLL__ herr_t H5FD_mpio_signal_right_neighbor(H5FD_t *file); __DLL__ herr_t H5FD_mpio_closing(H5FD_t *file); +__DLL__ int H5FD_mpio_mpi_rank(H5FD_t *_file); +__DLL__ int H5FD_mpio_mpi_size(H5FD_t *_file); #ifdef __cplusplus } #endif diff --git a/src/H5Fistore.c b/src/H5Fistore.c index f5bee8a..1d7feeb 100644 --- a/src/H5Fistore.c +++ b/src/H5Fistore.c @@ -1037,16 +1037,10 @@ H5F_istore_flush_entry(H5F_t *f, H5F_rdcc_ent_t *ent, hbool_t reset) * Create the chunk it if it doesn't exist, or reallocate the chunk if * its size changed. Then write the data into the file. */ - if (H5B_insert(f, H5B_ISTORE, ent->layout->addr, ent->split_ratios, - &udata)<0) { - HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, - "unable to allocate chunk"); - } - if (H5F_block_write(f, H5FD_MEM_DRAW, udata.addr, udata.key.nbytes, H5P_DATASET_XFER_DEFAULT, - buf)<0) { - HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, - "unable to write raw data to file"); - } + if (H5B_insert(f, H5B_ISTORE, ent->layout->addr, ent->split_ratios, &udata)<0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "unable to allocate chunk"); + if (H5F_block_write(f, H5FD_MEM_DRAW, udata.addr, udata.key.nbytes, H5P_DATASET_XFER_DEFAULT, buf)<0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "unable to write raw data to file"); /* Mark cache entry as clean */ ent->dirty = FALSE; @@ -2413,21 +2407,32 @@ done: * Robb Matzke, 1999-08-02 * The split_ratios are passed in as part of the data transfer * property list. + * + * Quincey Koziol, 2002-05-16 + * Rewrote algorithm to allocate & write blocks without using + * lock/unlock code. *------------------------------------------------------------------------- */ +#ifdef H5_HAVE_PARALLEL herr_t H5F_istore_allocate(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout, const hsize_t *space_dim, H5P_genplist_t *dc_plist) { - - int i, carry; - unsigned u; - hssize_t chunk_offset[H5O_LAYOUT_NDIMS]; - uint8_t *chunk=NULL; - unsigned idx_hint=0; - hsize_t chunk_size; - H5O_pline_t pline; /* I/O pipeline information */ - H5O_fill_t fill; /* Fill value information */ + hssize_t chunk_offset[H5O_LAYOUT_NDIMS]; /* Offset of current chunk */ + hsize_t chunk_size; /* Size of chunk in bytes */ + H5O_pline_t pline; /* I/O pipeline information */ + H5O_fill_t fill; /* Fill value information */ + H5F_istore_ud1_t udata; /* B-tree pass-through for creating chunk */ + void *chunk=NULL; /* Chunk buffer for writing fill values */ + H5P_genplist_t *dx_plist; /* Data xfer property list */ + double split_ratios[3];/* B-tree node splitting ratios */ + int mpi_rank; /* This process's rank */ + int mpi_size; /* Total # of processes */ + int mpi_round=0; /* Current process responsible for I/O */ + unsigned chunk_allocated=0; /* Flag to indicate that chunk was actually allocated */ + int carry; /* Flag to indicate that chunk increment carrys to higher dimension (sorta) */ + int i; /* Local index variable */ + unsigned u; /* Local index variable */ herr_t ret_value=SUCCEED; /* Return value */ FUNC_ENTER(H5F_istore_allocate, FAIL); @@ -2438,52 +2443,86 @@ H5F_istore_allocate(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout, assert(layout && H5D_CHUNKED==layout->type); assert(layout->ndims>0 && layout->ndims<=H5O_LAYOUT_NDIMS); assert(H5F_addr_defined(layout->addr)); + assert(H5I_GENPROP_LST==H5I_get_type(dxpl_id)); + assert(TRUE==H5P_isa_class(dxpl_id,H5P_DATASET_XFER)); + assert(dc_plist!=NULL); - /* Get necessary properties from property list */ + /* Get necessary properties from dataset creation property list */ if(H5P_get(dc_plist, H5D_CRT_FILL_VALUE_NAME, &fill) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get fill value"); + HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL, "can't get fill value"); if(H5P_get(dc_plist, H5D_CRT_DATA_PIPELINE_NAME, &pline) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get data pipeline"); + HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL, "can't get data pipeline"); + + /* Get necessary properties from dataset transfer property list */ + if (TRUE!=H5P_isa_class(dxpl_id,H5P_DATASET_XFER) || NULL == (dx_plist = H5I_object(dxpl_id))) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset transfer property list"); + if(H5P_get(dx_plist,H5D_XFER_BTREE_SPLIT_RATIO_NAME,split_ratios)<0) + HGOTO_ERROR(H5E_STORAGE, H5E_CANTGET, FAIL, "can't get B-tree split ratios"); + + /* Can't use data I/O pipeline in parallel (yet) */ + if (pline.nfilters>0) + HGOTO_ERROR(H5E_STORAGE, H5E_UNSUPPORTED, FAIL, "can't use data pipeline in parallel"); /* * Setup indice to go through all chunks. (Future improvement * should allocate only chunks that have no file space assigned yet. */ for (u=0, chunk_size=1; u<layout->ndims; u++) { - chunk_offset[u]=0; + chunk_offset[u] = 0; chunk_size *= layout->dim[u]; } /* end for */ - /* Loop over all chunks */ - carry=0; - while (carry==0) { - /* No file space assigned yet. Allocate it. */ - /* The following needs improvement like calling the */ - /* allocation directly rather than indirectly using the */ - /* allocation effect in the unlock process. */ + /* Allocate chunk buffer for processes to use when writing fill values */ + if (NULL==(chunk = H5F_istore_chunk_alloc(chunk_size))) + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for chunk"); + /* Fill the chunk with the proper values */ + if(fill.buf) { + /* + * Replicate the fill value throughout the chunk. + */ + assert(0==chunk_size % fill.size); + H5V_array_fill(chunk, fill.buf, fill.size, chunk_size/fill.size); + } else { /* - * Lock the chunk, copy from application to chunk, then unlock the - * chunk. + * No fill value was specified, assume all zeros. */ + HDmemset (chunk, 0, chunk_size); + } /* end else */ -#ifdef H5_HAVE_PARALLEL - /* rky 981207 Serialize access to this critical region. */ - if (SUCCEED!= H5FD_mpio_wait_for_left_neighbor(f->shared->lf)) - HGOTO_ERROR (H5E_IO, H5E_WRITEERROR, FAIL, "unable to lock the data chunk"); -#endif - if (NULL==(chunk=H5F_istore_lock(f, dxpl_id, layout, &pline, - &fill, chunk_offset, FALSE, &idx_hint))) - HGOTO_ERROR (H5E_IO, H5E_WRITEERROR, FAIL, "unable to read raw data chunk"); - - H5_CHECK_OVERFLOW(chunk_size,hsize_t,size_t); - if (H5F_istore_unlock(f, dxpl_id, layout, &pline, TRUE, - chunk_offset, &idx_hint, chunk, (size_t)chunk_size)<0) - HGOTO_ERROR (H5E_IO, H5E_WRITEERROR, FAIL, "uanble to unlock raw data chunk"); -#ifdef H5_HAVE_PARALLEL - if (SUCCEED!= H5FD_mpio_signal_right_neighbor(f->shared->lf)) - HGOTO_ERROR (H5E_IO, H5E_WRITEERROR, FAIL, "unable to unlock the data chunk"); -#endif + /* Retrieve up MPI parameters */ + if ((mpi_rank=H5FD_mpio_mpi_rank(f->shared->lf))<0) + HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI rank"); + if ((mpi_size=H5FD_mpio_mpi_size(f->shared->lf))<0) + HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI size"); + + /* Loop over all chunks */ + carry=0; + while (carry==0) { + /* Check if the chunk exists yet */ + if(H5F_istore_get_addr(f,layout,chunk_offset)==HADDR_UNDEF) { + /* Initialize the chunk information */ + udata.mesg = *layout; + udata.key.filter_mask = 0; + udata.addr = HADDR_UNDEF; + udata.key.nbytes = chunk_size; + for (u=0; u<layout->ndims; u++) + udata.key.offset[u] = chunk_offset[u]; + + /* Allocate the chunk with all processes */ + if (H5B_insert(f, H5B_ISTORE, layout->addr, split_ratios, &udata)<0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "unable to allocate chunk"); + + /* Round-robin write the chunks out from only one process */ + if(mpi_round==mpi_rank) { + if (H5F_block_write(f, H5FD_MEM_DRAW, udata.addr, udata.key.nbytes, dxpl_id, chunk)<0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "unable to write raw data to file"); + } /* end if */ + mpi_round=(++mpi_round)%mpi_size; + + /* Indicate that a chunk was allocated */ + chunk_allocated=1; + } /* end if */ /* Increment indices */ for (i=layout->ndims-1, carry=1; i>=0 && carry; --i) { @@ -2496,24 +2535,24 @@ H5F_istore_allocate(H5F_t *f, hid_t dxpl_id, const H5O_layout_t *layout, } /* end for */ } /* end while */ -#ifdef H5_HAVE_PARALLEL - /* - * rky 980923 - * - * The following barrier is a temporary fix to prevent overwriting real - * data caused by a race between one proc's call of H5F_istore_allocate - * (from H5D_init_storage, ultimately from H5Dcreate and H5Dextend) and - * another proc's call of H5Dwrite. Eventually, this barrier should be - * removed, when H5D_init_storage is changed to call H5MF_alloc directly - * to allocate space, instead of calling H5F_istore_unlock. - */ - if (MPI_Barrier(H5FD_mpio_communicator(f->shared->lf))) - HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_Barrier failed"); -#endif + /* Only need to block at the barrier if we actually allocated a chunk */ + if(chunk_allocated) { + /* Wait at barrier to avoid race conditions where some processes are + * still writing out chunks and other processes race ahead to read + * them in, getting bogus data. + */ + if (MPI_Barrier(H5FD_mpio_communicator(f->shared->lf))) + HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_Barrier failed"); + } /* end if */ done: + /* Free the chunk for fill values */ + if(chunk!=NULL) + H5F_istore_chunk_free(chunk); + FUNC_LEAVE(ret_value); } +#endif /* H5_HAVE_PARALLEL */ /*------------------------------------------------------------------------- @@ -2824,7 +2863,6 @@ H5F_istore_initialize_by_extent(H5F_t *f, const H5O_layout_t *layout, hsize_t idx_max[H5O_LAYOUT_NDIMS]; hsize_t sub_size[H5O_LAYOUT_NDIMS]; hsize_t naccessed; /*bytes accessed in chunk */ - hsize_t elm_size; /*size of an element in bytes */ hsize_t end_chunk; /*chunk position counter */ hssize_t start[H5O_LAYOUT_NDIMS]; /*starting location of hyperslab */ hsize_t count[H5O_LAYOUT_NDIMS]; /*element count of hyperslab */ @@ -2866,7 +2904,6 @@ H5F_istore_initialize_by_extent(H5F_t *f, const H5O_layout_t *layout, for(i = 0; i < rank; i++) size[i] = curr_dims[i]; size[i] = layout->dim[i]; - elm_size = size[i]; /* Default dataset transfer property list */ dxpl_id = H5P_DATASET_XFER_DEFAULT; diff --git a/src/H5Fpkg.h b/src/H5Fpkg.h index 72cd321..30439b4 100644 --- a/src/H5Fpkg.h +++ b/src/H5Fpkg.h @@ -194,10 +194,12 @@ __DLL__ herr_t H5F_istore_write(H5F_t *f, hid_t dxpl_id, const hsize_t size_m[], const hssize_t offset_m[], const hssize_t offset[], const hsize_t size[], const void *buf); +#ifdef H5_HAVE_PARALLEL __DLL__ herr_t H5F_istore_allocate (H5F_t *f, hid_t dxpl_id, const struct H5O_layout_t *layout, const hsize_t *space_dim, struct H5P_genplist_t *dc_plist); +#endif /* H5_HAVE_PARALLEL */ /* Functions that operate on contiguous storage wrt boot block */ __DLL__ herr_t H5F_contig_read(H5F_t *f, hsize_t max_data, H5FD_mem_t type, haddr_t addr, |