diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/H5.c | 13 | ||||
-rw-r--r-- | src/H5Dchunk.c | 779 | ||||
-rw-r--r-- | src/H5Dcompact.c | 7 | ||||
-rw-r--r-- | src/H5Dcontig.c | 111 | ||||
-rw-r--r-- | src/H5Defl.c | 8 | ||||
-rw-r--r-- | src/H5Dio.c | 21 | ||||
-rw-r--r-- | src/H5Dpkg.h | 6 | ||||
-rw-r--r-- | src/H5FD.c | 368 | ||||
-rw-r--r-- | src/H5FDcore.c | 4 | ||||
-rw-r--r-- | src/H5FDdevelop.h | 20 | ||||
-rw-r--r-- | src/H5FDdirect.c | 4 | ||||
-rw-r--r-- | src/H5FDfamily.c | 4 | ||||
-rw-r--r-- | src/H5FDhdfs.c | 4 | ||||
-rw-r--r-- | src/H5FDint.c | 1928 | ||||
-rw-r--r-- | src/H5FDlog.c | 4 | ||||
-rw-r--r-- | src/H5FDmirror.c | 4 | ||||
-rw-r--r-- | src/H5FDmpio.c | 992 | ||||
-rw-r--r-- | src/H5FDmulti.c | 4 | ||||
-rw-r--r-- | src/H5FDprivate.h | 22 | ||||
-rw-r--r-- | src/H5FDros3.c | 4 | ||||
-rw-r--r-- | src/H5FDsec2.c | 4 | ||||
-rw-r--r-- | src/H5FDsplitter.c | 4 | ||||
-rw-r--r-- | src/H5FDstdio.c | 4 | ||||
-rw-r--r-- | src/H5Fio.c | 93 | ||||
-rw-r--r-- | src/H5Fprivate.h | 11 | ||||
-rw-r--r-- | src/H5PB.c | 67 | ||||
-rw-r--r-- | src/H5PBprivate.h | 1 | ||||
-rw-r--r-- | src/H5private.h | 5 |
28 files changed, 4266 insertions, 230 deletions
@@ -83,6 +83,8 @@ hbool_t H5_libinit_g = FALSE; /* Library hasn't been initialized */ hbool_t H5_libterm_g = FALSE; /* Library isn't being shutdown */ #endif +hbool_t H5_use_selection_io_g = FALSE; + #ifdef H5_HAVE_MPE hbool_t H5_MPEinit_g = FALSE; /* MPE Library hasn't been initialized */ #endif @@ -145,7 +147,8 @@ done: herr_t H5_init_library(void) { - herr_t ret_value = SUCCEED; + char * env_use_select_io = NULL; + herr_t ret_value = SUCCEED; /* Set the 'library initialized' flag as early as possible, to avoid * possible re-entrancy. @@ -276,6 +279,14 @@ H5_init_library(void) if (H5VL_init_phase2() < 0) HGOTO_ERROR(H5E_FUNC, H5E_CANTINIT, FAIL, "unable to initialize vol interface") + /* Check for HDF5_USE_SELECTION_IO env variable */ + env_use_select_io = HDgetenv("HDF5_USE_SELECTION_IO"); + if (NULL != env_use_select_io && HDstrcmp(env_use_select_io, "") && HDstrcmp(env_use_select_io, "0") && + HDstrcmp(env_use_select_io, "no") && HDstrcmp(env_use_select_io, "No") && + HDstrcmp(env_use_select_io, "NO") && HDstrcmp(env_use_select_io, "false") && + HDstrcmp(env_use_select_io, "False") && HDstrcmp(env_use_select_io, "FALSE")) + H5_use_selection_io_g = TRUE; + /* Debugging? */ H5__debug_mask("-all"); H5__debug_mask(HDgetenv("HDF5_DEBUG")); diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c index 6600417..0cc1681 100644 --- a/src/H5Dchunk.c +++ b/src/H5Dchunk.c @@ -59,6 +59,7 @@ #include "H5Iprivate.h" /* IDs */ #include "H5MMprivate.h" /* Memory management */ #include "H5MFprivate.h" /* File memory management */ +#include "H5PBprivate.h" /* Page Buffer */ #include "H5VMprivate.h" /* Vector and array functions */ /****************/ @@ -70,6 +71,7 @@ #define H5D_CHUNK_GET_NODE_INFO(map, node) \ (map->use_single ? map->single_chunk_info : (H5D_chunk_info_t *)H5SL_item(node)) #define H5D_CHUNK_GET_NEXT_NODE(map, node) (map->use_single ? (H5SL_node_t *)NULL : H5SL_next(node)) +#define H5D_CHUNK_GET_NODE_COUNT(map) (map->use_single ? (size_t)1 : H5SL_count(map->sel_chunks)) /* Sanity check on chunk index types: commonly used by a lot of routines in this file */ #define H5D_CHUNK_STORAGE_INDEX_CHK(storage) \ @@ -257,9 +259,8 @@ typedef struct H5D_chunk_iter_ud_t { /* Chunked layout operation callbacks */ static herr_t H5D__chunk_construct(H5F_t *f, H5D_t *dset); static herr_t H5D__chunk_init(H5F_t *f, const H5D_t *dset, hid_t dapl_id); -static herr_t H5D__chunk_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, - hsize_t nelmts, const H5S_t *file_space, const H5S_t *mem_space, - H5D_chunk_map_t *fm); +static herr_t H5D__chunk_io_init(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts, + const H5S_t *file_space, const H5S_t *mem_space, H5D_chunk_map_t *fm); static herr_t H5D__chunk_io_init_selections(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, H5D_chunk_map_t *fm); static herr_t H5D__chunk_read(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts, @@ -304,6 +305,7 @@ static herr_t H5D__chunk_file_cb(void *elem, const H5T_t *type, unsigned ndims void *fm); static herr_t H5D__chunk_mem_cb(void *elem, const H5T_t *type, unsigned ndims, const hsize_t *coords, void *fm); +static htri_t H5D__chunk_may_use_select_io(const H5D_io_info_t *io_info); static unsigned H5D__chunk_hash_val(const H5D_shared_t *shared, const hsize_t *scaled); static herr_t H5D__chunk_flush_entry(const H5D_t *dset, H5D_rdcc_ent_t *ent, hbool_t reset); static herr_t H5D__chunk_cache_evict(const H5D_t *dset, H5D_rdcc_ent_t *ent, hbool_t flush); @@ -1056,16 +1058,17 @@ H5D__chunk_is_data_cached(const H5D_shared_t *shared_dset) *------------------------------------------------------------------------- */ static herr_t -H5D__chunk_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts, +H5D__chunk_io_init(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts, const H5S_t *file_space, const H5S_t *mem_space, H5D_chunk_map_t *fm) { const H5D_t *dataset = io_info->dset; /* Local pointer to dataset info */ hssize_t old_offset[H5O_LAYOUT_NDIMS]; /* Old selection offset */ htri_t file_space_normalized = FALSE; /* File dataspace was normalized */ unsigned f_ndims; /* The number of dimensions of the file's dataspace */ - int sm_ndims; /* The number of dimensions of the memory buffer's dataspace (signed) */ - unsigned u; /* Local index variable */ - herr_t ret_value = SUCCEED; /* Return value */ + int sm_ndims; /* The number of dimensions of the memory buffer's dataspace (signed) */ + htri_t use_selection_io = FALSE; /* Whether to use selection I/O */ + unsigned u; /* Local index variable */ + herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_STATIC @@ -1119,6 +1122,11 @@ H5D__chunk_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t *type_inf if (H5D__chunk_io_init_selections(io_info, type_info, fm) < 0) HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to create file and memory chunk selections") + /* Check if we're performing selection I/O and save the result */ + if ((use_selection_io = H5D__chunk_may_use_select_io(io_info)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't check if selection I/O is possible") + io_info->use_select_io = (hbool_t)use_selection_io; + done: /* Reset the global dataspace info */ fm->file_space = NULL; @@ -2438,6 +2446,76 @@ done: } /* end H5D__chunk_cacheable() */ /*------------------------------------------------------------------------- + * Function: H5D__chunk_may_use_select_io + * + * Purpose: A small internal function to if it may be possible to use + * selection I/O. + * + * Return: TRUE or FALSE + * + * Programmer: Neil Fortner + * 4 May 2021 + * + *------------------------------------------------------------------------- + */ +static htri_t +H5D__chunk_may_use_select_io(const H5D_io_info_t *io_info) +{ + const H5D_t *dataset = io_info->dset; /* Local pointer to dataset info */ + htri_t ret_value = FAIL; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity check */ + HDassert(io_info); + HDassert(dataset); + + /* Don't use selection I/O if it's globally disabled, there is a type + * conversion, or if there are filters on the dataset (for now) */ + if (!H5_use_selection_io_g || io_info->io_ops.single_read != H5D__select_read || + dataset->shared->dcpl_cache.pline.nused > 0) + ret_value = FALSE; + else { + htri_t page_buf_enabled; + + HDassert(io_info->io_ops.single_write == H5D__select_write); + + /* Check if the page buffer is enabled */ + if ((page_buf_enabled = H5PB_enabled(io_info->f_sh, H5FD_MEM_DRAW)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't check if page buffer is enabled") + if (page_buf_enabled) + ret_value = FALSE; + else { + /* Check if chunks in this dataset may be cached, if so don't use + * selection I/O (for now). Note that chunks temporarily cached for + * the purpose of writing the fill value don't count, since they are + * immediately evicted. */ +#ifdef H5_HAVE_PARALLEL + /* If MPI based VFD is used and the file is opened for write access, + * must bypass the chunk-cache scheme because other MPI processes + * could be writing to other elements in the same chunk. + */ + if (io_info->using_mpi_vfd && (H5F_ACC_RDWR & H5F_INTENT(dataset->oloc.file))) + ret_value = TRUE; + else { +#endif /* H5_HAVE_PARALLEL */ + /* Check if the chunk is too large to keep in the cache */ + H5_CHECK_OVERFLOW(dataset->shared->layout.u.chunk.size, uint32_t, size_t); + if ((size_t)dataset->shared->layout.u.chunk.size > dataset->shared->cache.chunk.nbytes_max) + ret_value = TRUE; + else + ret_value = FALSE; +#ifdef H5_HAVE_PARALLEL + } /* end else */ +#endif /* H5_HAVE_PARALLEL */ + } /* end else */ + } /* end else */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__chunk_may_use_select_io() */ + +/*------------------------------------------------------------------------- * Function: H5D__chunk_read * * Purpose: Read from a chunked dataset. @@ -2454,16 +2532,17 @@ H5D__chunk_read(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_ const H5S_t H5_ATTR_UNUSED *file_space, const H5S_t H5_ATTR_UNUSED *mem_space, H5D_chunk_map_t *fm) { - H5SL_node_t * chunk_node; /* Current node in chunk skip list */ - H5D_io_info_t nonexistent_io_info; /* "nonexistent" I/O info object */ - H5D_io_info_t ctg_io_info; /* Contiguous I/O info object */ - H5D_storage_t ctg_store; /* Chunk storage information as contiguous dataset */ - H5D_io_info_t cpt_io_info; /* Compact I/O info object */ - H5D_storage_t cpt_store; /* Chunk storage information as compact dataset */ - hbool_t cpt_dirty; /* Temporary placeholder for compact storage "dirty" flag */ - uint32_t src_accessed_bytes = 0; /* Total accessed size in a chunk */ - hbool_t skip_missing_chunks = FALSE; /* Whether to skip missing chunks */ - herr_t ret_value = SUCCEED; /*return value */ + H5SL_node_t * chunk_node; /* Current node in chunk skip list */ + H5D_io_info_t nonexistent_io_info; /* "nonexistent" I/O info object */ + uint32_t src_accessed_bytes = 0; /* Total accessed size in a chunk */ + hbool_t skip_missing_chunks = FALSE; /* Whether to skip missing chunks */ + H5S_t ** chunk_mem_spaces = NULL; /* Array of chunk memory spaces */ + H5S_t * chunk_mem_spaces_static[8]; /* Static buffer for chunk_mem_spaces */ + H5S_t ** chunk_file_spaces = NULL; /* Array of chunk file spaces */ + H5S_t * chunk_file_spaces_static[8]; /* Static buffer for chunk_file_spaces */ + haddr_t * chunk_addrs = NULL; /* Array of chunk addresses */ + haddr_t chunk_addrs_static[8]; /* Static buffer for chunk_addrs */ + herr_t ret_value = SUCCEED; /*return value */ FUNC_ENTER_STATIC @@ -2477,23 +2556,6 @@ H5D__chunk_read(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_ H5MM_memcpy(&nonexistent_io_info, io_info, sizeof(nonexistent_io_info)); nonexistent_io_info.layout_ops = *H5D_LOPS_NONEXISTENT; - /* Set up contiguous I/O info object */ - H5MM_memcpy(&ctg_io_info, io_info, sizeof(ctg_io_info)); - ctg_io_info.store = &ctg_store; - ctg_io_info.layout_ops = *H5D_LOPS_CONTIG; - - /* Initialize temporary contiguous storage info */ - H5_CHECKED_ASSIGN(ctg_store.contig.dset_size, hsize_t, io_info->dset->shared->layout.u.chunk.size, - uint32_t); - - /* Set up compact I/O info object */ - H5MM_memcpy(&cpt_io_info, io_info, sizeof(cpt_io_info)); - cpt_io_info.store = &cpt_store; - cpt_io_info.layout_ops = *H5D_LOPS_COMPACT; - - /* Initialize temporary compact storage info */ - cpt_store.compact.dirty = &cpt_dirty; - { const H5O_fill_t *fill = &(io_info->dset->shared->dcpl_cache.fill); /* Fill value info */ H5D_fill_value_t fill_status; /* Fill value status */ @@ -2511,80 +2573,217 @@ H5D__chunk_read(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_ skip_missing_chunks = TRUE; } - /* Iterate through nodes in chunk skip list */ - chunk_node = H5D_CHUNK_GET_FIRST_NODE(fm); - while (chunk_node) { - H5D_chunk_info_t *chunk_info; /* Chunk information */ - H5D_chunk_ud_t udata; /* Chunk index pass-through */ + /* Different blocks depending on whether we're using selection I/O */ + if (io_info->use_select_io) { + size_t num_chunks; + size_t element_sizes[2] = {type_info->dst_type_size, 0}; + void * bufs[2] = {io_info->u.rbuf, NULL}; + + /* Cache number of chunks */ + num_chunks = H5D_CHUNK_GET_NODE_COUNT(fm); + + /* Allocate arrays of dataspaces and offsets for use with selection I/O, + * or point to static buffers */ + HDassert(sizeof(chunk_mem_spaces_static) / sizeof(chunk_mem_spaces_static[0]) == + sizeof(chunk_file_spaces_static) / sizeof(chunk_file_spaces_static[0])); + HDassert(sizeof(chunk_mem_spaces_static) / sizeof(chunk_mem_spaces_static[0]) == + sizeof(chunk_addrs_static) / sizeof(chunk_addrs_static[0])); + if (num_chunks > (sizeof(chunk_mem_spaces_static) / sizeof(chunk_mem_spaces_static[0]))) { + if (NULL == (chunk_mem_spaces = H5MM_malloc(num_chunks * sizeof(H5S_t *)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for memory space list") + if (NULL == (chunk_file_spaces = H5MM_malloc(num_chunks * sizeof(H5S_t *)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "memory allocation failed for file space list") + if (NULL == (chunk_addrs = H5MM_malloc(num_chunks * sizeof(haddr_t)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for chunk address list") + } /* end if */ + else { + chunk_mem_spaces = chunk_mem_spaces_static; + chunk_file_spaces = chunk_file_spaces_static; + chunk_addrs = chunk_addrs_static; + } /* end else */ - /* Get the actual chunk information from the skip list node */ - chunk_info = H5D_CHUNK_GET_NODE_INFO(fm, chunk_node); + /* Reset num_chunks */ + num_chunks = 0; - /* Get the info for the chunk in the file */ - if (H5D__chunk_lookup(io_info->dset, chunk_info->scaled, &udata) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address") + /* Iterate through nodes in chunk skip list */ + chunk_node = H5D_CHUNK_GET_FIRST_NODE(fm); + while (chunk_node) { + H5D_chunk_info_t *chunk_info; /* Chunk information */ + H5D_chunk_ud_t udata; /* Chunk index pass-through */ - /* Sanity check */ - HDassert((H5F_addr_defined(udata.chunk_block.offset) && udata.chunk_block.length > 0) || - (!H5F_addr_defined(udata.chunk_block.offset) && udata.chunk_block.length == 0)); + /* Get the actual chunk information from the skip list node */ + chunk_info = H5D_CHUNK_GET_NODE_INFO(fm, chunk_node); - /* Check for non-existant chunk & skip it if appropriate */ - if (H5F_addr_defined(udata.chunk_block.offset) || UINT_MAX != udata.idx_hint || - !skip_missing_chunks) { - H5D_io_info_t *chk_io_info; /* Pointer to I/O info object for this chunk */ - void * chunk = NULL; /* Pointer to locked chunk buffer */ - htri_t cacheable; /* Whether the chunk is cacheable */ + /* Get the info for the chunk in the file */ + if (H5D__chunk_lookup(io_info->dset, chunk_info->scaled, &udata) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address") - /* Set chunk's [scaled] coordinates */ - io_info->store->chunk.scaled = chunk_info->scaled; + /* There should be no chunks cached */ + HDassert(UINT_MAX == udata.idx_hint); - /* Determine if we should use the chunk cache */ - if ((cacheable = H5D__chunk_cacheable(io_info, udata.chunk_block.offset, FALSE)) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't tell if chunk is cacheable") - if (cacheable) { - /* Load the chunk into cache and lock it. */ + /* Sanity check */ + HDassert((H5F_addr_defined(udata.chunk_block.offset) && udata.chunk_block.length > 0) || + (!H5F_addr_defined(udata.chunk_block.offset) && udata.chunk_block.length == 0)); + + /* Check for non-existant chunk & skip it if appropriate */ + if (H5F_addr_defined(udata.chunk_block.offset)) { + /* Add chunk to list for selection I/O */ + chunk_mem_spaces[num_chunks] = chunk_info->mspace; + chunk_file_spaces[num_chunks] = chunk_info->fspace; + chunk_addrs[num_chunks] = udata.chunk_block.offset; + num_chunks++; + } /* end if */ + else if (!skip_missing_chunks) { + /* Perform the actual read operation from the nonexistent chunk + */ + if ((io_info->io_ops.single_read)(&nonexistent_io_info, type_info, + (hsize_t)chunk_info->chunk_points, chunk_info->fspace, + chunk_info->mspace) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "chunked read failed") + } /* end if */ - /* Compute # of bytes accessed in chunk */ - H5_CHECK_OVERFLOW(type_info->src_type_size, /*From:*/ size_t, /*To:*/ uint32_t); - src_accessed_bytes = chunk_info->chunk_points * (uint32_t)type_info->src_type_size; + /* Advance to next chunk in list */ + chunk_node = H5D_CHUNK_GET_NEXT_NODE(fm, chunk_node); + } /* end while */ - /* Lock the chunk into the cache */ - if (NULL == (chunk = H5D__chunk_lock(io_info, &udata, FALSE, FALSE))) - HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to read raw data chunk") + /* Issue selection I/O call (we can skip the page buffer because we've + * already verified it won't be used, and the metadata accumulator + * because this is raw data) */ + if (H5F_shared_select_read(H5F_SHARED(io_info->dset->oloc.file), H5FD_MEM_DRAW, (uint32_t)num_chunks, + (const H5S_t *const *)chunk_mem_spaces, + (const H5S_t *const *)chunk_file_spaces, chunk_addrs, element_sizes, + bufs) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "chunk selection read failed") + + /* Clean up memory */ + if (chunk_mem_spaces != chunk_mem_spaces_static) { + HDassert(chunk_mem_spaces); + HDassert(chunk_file_spaces != chunk_file_spaces_static); + HDassert(chunk_addrs != chunk_addrs_static); + H5MM_free(chunk_mem_spaces); + chunk_mem_spaces = NULL; + H5MM_free(chunk_file_spaces); + chunk_file_spaces = NULL; + H5MM_free(chunk_addrs); + chunk_addrs = NULL; + } /* end if */ + } /* end if */ + else { + H5D_io_info_t ctg_io_info; /* Contiguous I/O info object */ + H5D_storage_t ctg_store; /* Chunk storage information as contiguous dataset */ + H5D_io_info_t cpt_io_info; /* Compact I/O info object */ + H5D_storage_t cpt_store; /* Chunk storage information as compact dataset */ + hbool_t cpt_dirty; /* Temporary placeholder for compact storage "dirty" flag */ + + /* Set up contiguous I/O info object */ + H5MM_memcpy(&ctg_io_info, io_info, sizeof(ctg_io_info)); + ctg_io_info.store = &ctg_store; + ctg_io_info.layout_ops = *H5D_LOPS_CONTIG; + + /* Initialize temporary contiguous storage info */ + H5_CHECKED_ASSIGN(ctg_store.contig.dset_size, hsize_t, io_info->dset->shared->layout.u.chunk.size, + uint32_t); + + /* Set up compact I/O info object */ + H5MM_memcpy(&cpt_io_info, io_info, sizeof(cpt_io_info)); + cpt_io_info.store = &cpt_store; + cpt_io_info.layout_ops = *H5D_LOPS_COMPACT; + + /* Initialize temporary compact storage info */ + cpt_store.compact.dirty = &cpt_dirty; + + /* Iterate through nodes in chunk skip list */ + chunk_node = H5D_CHUNK_GET_FIRST_NODE(fm); + while (chunk_node) { + H5D_chunk_info_t *chunk_info; /* Chunk information */ + H5D_chunk_ud_t udata; /* Chunk index pass-through */ + htri_t cacheable; /* Whether the chunk is cacheable */ + + /* Get the actual chunk information from the skip list node */ + chunk_info = H5D_CHUNK_GET_NODE_INFO(fm, chunk_node); + + /* Get the info for the chunk in the file */ + if (H5D__chunk_lookup(io_info->dset, chunk_info->scaled, &udata) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address") - /* Set up the storage buffer information for this chunk */ - cpt_store.compact.buf = chunk; + /* Sanity check */ + HDassert((H5F_addr_defined(udata.chunk_block.offset) && udata.chunk_block.length > 0) || + (!H5F_addr_defined(udata.chunk_block.offset) && udata.chunk_block.length == 0)); - /* Point I/O info at contiguous I/O info for this chunk */ - chk_io_info = &cpt_io_info; - } /* end if */ - else if (H5F_addr_defined(udata.chunk_block.offset)) { - /* Set up the storage address information for this chunk */ - ctg_store.contig.dset_addr = udata.chunk_block.offset; + /* Check for non-existant chunk & skip it if appropriate */ + if (H5F_addr_defined(udata.chunk_block.offset) || UINT_MAX != udata.idx_hint || + !skip_missing_chunks) { + H5D_io_info_t *chk_io_info; /* Pointer to I/O info object for this chunk */ + void * chunk = NULL; /* Pointer to locked chunk buffer */ - /* Point I/O info at temporary I/O info for this chunk */ - chk_io_info = &ctg_io_info; - } /* end else if */ - else { - /* Point I/O info at "nonexistent" I/O info for this chunk */ - chk_io_info = &nonexistent_io_info; - } /* end else */ + /* Set chunk's [scaled] coordinates */ + io_info->store->chunk.scaled = chunk_info->scaled; - /* Perform the actual read operation */ - if ((io_info->io_ops.single_read)(chk_io_info, type_info, (hsize_t)chunk_info->chunk_points, - chunk_info->fspace, chunk_info->mspace) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "chunked read failed") + /* Determine if we should use the chunk cache */ + if ((cacheable = H5D__chunk_cacheable(io_info, udata.chunk_block.offset, FALSE)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't tell if chunk is cacheable") + if (cacheable) { + /* Load the chunk into cache and lock it. */ - /* Release the cache lock on the chunk. */ - if (chunk && H5D__chunk_unlock(io_info, &udata, FALSE, chunk, src_accessed_bytes) < 0) - HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to unlock raw data chunk") - } /* end if */ + /* Compute # of bytes accessed in chunk */ + H5_CHECK_OVERFLOW(type_info->src_type_size, /*From:*/ size_t, /*To:*/ uint32_t); + src_accessed_bytes = chunk_info->chunk_points * (uint32_t)type_info->src_type_size; - /* Advance to next chunk in list */ - chunk_node = H5D_CHUNK_GET_NEXT_NODE(fm, chunk_node); - } /* end while */ + /* Lock the chunk into the cache */ + if (NULL == (chunk = H5D__chunk_lock(io_info, &udata, FALSE, FALSE))) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to read raw data chunk") + + /* Set up the storage buffer information for this chunk */ + cpt_store.compact.buf = chunk; + + /* Point I/O info at contiguous I/O info for this chunk */ + chk_io_info = &cpt_io_info; + } /* end if */ + else if (H5F_addr_defined(udata.chunk_block.offset)) { + /* Set up the storage address information for this chunk */ + ctg_store.contig.dset_addr = udata.chunk_block.offset; + + /* Point I/O info at temporary I/O info for this chunk */ + chk_io_info = &ctg_io_info; + } /* end else if */ + else { + /* Point I/O info at "nonexistent" I/O info for this chunk */ + chk_io_info = &nonexistent_io_info; + } /* end else */ + + /* Perform the actual read operation */ + if ((io_info->io_ops.single_read)(chk_io_info, type_info, (hsize_t)chunk_info->chunk_points, + chunk_info->fspace, chunk_info->mspace) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "chunked read failed") + + /* Release the cache lock on the chunk. */ + if (chunk && H5D__chunk_unlock(io_info, &udata, FALSE, chunk, src_accessed_bytes) < 0) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to unlock raw data chunk") + } /* end if */ + + /* Advance to next chunk in list */ + chunk_node = H5D_CHUNK_GET_NEXT_NODE(fm, chunk_node); + } /* end while */ + } /* end else */ done: + /* Cleanup on failure */ + if (ret_value < 0) { + if (chunk_mem_spaces != chunk_mem_spaces_static) + chunk_mem_spaces = H5MM_xfree(chunk_mem_spaces); + if (chunk_file_spaces != chunk_file_spaces_static) + chunk_file_spaces = H5MM_xfree(chunk_file_spaces); + if (chunk_addrs != chunk_addrs_static) + chunk_addrs = H5MM_xfree(chunk_addrs); + } /* end if */ + + /* Make sure we cleaned up */ + HDassert(!chunk_mem_spaces || chunk_mem_spaces == chunk_mem_spaces_static); + HDassert(!chunk_file_spaces || chunk_file_spaces == chunk_file_spaces_static); + HDassert(!chunk_addrs || chunk_addrs == chunk_addrs_static); + FUNC_LEAVE_NOAPI(ret_value) } /* H5D__chunk_read() */ @@ -2605,14 +2804,20 @@ H5D__chunk_write(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize const H5S_t H5_ATTR_UNUSED *file_space, const H5S_t H5_ATTR_UNUSED *mem_space, H5D_chunk_map_t *fm) { - H5SL_node_t * chunk_node; /* Current node in chunk skip list */ - H5D_io_info_t ctg_io_info; /* Contiguous I/O info object */ - H5D_storage_t ctg_store; /* Chunk storage information as contiguous dataset */ - H5D_io_info_t cpt_io_info; /* Compact I/O info object */ - H5D_storage_t cpt_store; /* Chunk storage information as compact dataset */ - hbool_t cpt_dirty; /* Temporary placeholder for compact storage "dirty" flag */ - uint32_t dst_accessed_bytes = 0; /* Total accessed size in a chunk */ - herr_t ret_value = SUCCEED; /* Return value */ + H5SL_node_t * chunk_node; /* Current node in chunk skip list */ + H5D_io_info_t ctg_io_info; /* Contiguous I/O info object */ + H5D_storage_t ctg_store; /* Chunk storage information as contiguous dataset */ + H5D_io_info_t cpt_io_info; /* Compact I/O info object */ + H5D_storage_t cpt_store; /* Chunk storage information as compact dataset */ + hbool_t cpt_dirty; /* Temporary placeholder for compact storage "dirty" flag */ + uint32_t dst_accessed_bytes = 0; /* Total accessed size in a chunk */ + H5S_t ** chunk_mem_spaces = NULL; /* Array of chunk memory spaces */ + H5S_t * chunk_mem_spaces_static[8]; /* Static buffer for chunk_mem_spaces */ + H5S_t ** chunk_file_spaces = NULL; /* Array of chunk file spaces */ + H5S_t * chunk_file_spaces_static[8]; /* Static buffer for chunk_file_spaces */ + haddr_t * chunk_addrs = NULL; /* Array of chunk addresses */ + haddr_t chunk_addrs_static[8]; /* Static buffer for chunk_addrs */ + herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_STATIC @@ -2639,116 +2844,296 @@ H5D__chunk_write(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize /* Initialize temporary compact storage info */ cpt_store.compact.dirty = &cpt_dirty; - /* Iterate through nodes in chunk skip list */ - chunk_node = H5D_CHUNK_GET_FIRST_NODE(fm); - while (chunk_node) { - H5D_chunk_info_t * chunk_info; /* Chunk information */ - H5D_chk_idx_info_t idx_info; /* Chunked index info */ - H5D_io_info_t * chk_io_info; /* Pointer to I/O info object for this chunk */ - void * chunk; /* Pointer to locked chunk buffer */ - H5D_chunk_ud_t udata; /* Index pass-through */ - htri_t cacheable; /* Whether the chunk is cacheable */ - hbool_t need_insert = FALSE; /* Whether the chunk needs to be inserted into the index */ + /* Different blocks depending on whether we're using selection I/O */ + if (io_info->use_select_io) { + size_t num_chunks; + size_t element_sizes[2] = {type_info->dst_type_size, 0}; + const void *bufs[2] = {io_info->u.wbuf, NULL}; + + /* Cache number of chunks */ + num_chunks = H5D_CHUNK_GET_NODE_COUNT(fm); + + /* Allocate arrays of dataspaces and offsets for use with selection I/O, + * or point to static buffers */ + HDassert(sizeof(chunk_mem_spaces_static) / sizeof(chunk_mem_spaces_static[0]) == + sizeof(chunk_file_spaces_static) / sizeof(chunk_file_spaces_static[0])); + HDassert(sizeof(chunk_mem_spaces_static) / sizeof(chunk_mem_spaces_static[0]) == + sizeof(chunk_addrs_static) / sizeof(chunk_addrs_static[0])); + if (num_chunks > (sizeof(chunk_mem_spaces_static) / sizeof(chunk_mem_spaces_static[0]))) { + if (NULL == (chunk_mem_spaces = H5MM_malloc(num_chunks * sizeof(H5S_t *)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for memory space list") + if (NULL == (chunk_file_spaces = H5MM_malloc(num_chunks * sizeof(H5S_t *)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "memory allocation failed for file space list") + if (NULL == (chunk_addrs = H5MM_malloc(num_chunks * sizeof(haddr_t)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for chunk address list") + } /* end if */ + else { + chunk_mem_spaces = chunk_mem_spaces_static; + chunk_file_spaces = chunk_file_spaces_static; + chunk_addrs = chunk_addrs_static; + } /* end else */ - /* Get the actual chunk information from the skip list node */ - chunk_info = H5D_CHUNK_GET_NODE_INFO(fm, chunk_node); + /* Reset num_chunks */ + num_chunks = 0; - /* Look up the chunk */ - if (H5D__chunk_lookup(io_info->dset, chunk_info->scaled, &udata) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address") + /* Iterate through nodes in chunk skip list */ + chunk_node = H5D_CHUNK_GET_FIRST_NODE(fm); + while (chunk_node) { + H5D_chunk_info_t * chunk_info; /* Chunk information */ + H5D_chk_idx_info_t idx_info; /* Chunked index info */ + H5D_chunk_ud_t udata; /* Index pass-through */ + htri_t cacheable; /* Whether the chunk is cacheable */ + hbool_t need_insert = FALSE; /* Whether the chunk needs to be inserted into the index */ - /* Sanity check */ - HDassert((H5F_addr_defined(udata.chunk_block.offset) && udata.chunk_block.length > 0) || - (!H5F_addr_defined(udata.chunk_block.offset) && udata.chunk_block.length == 0)); - - /* Set chunk's [scaled] coordinates */ - io_info->store->chunk.scaled = chunk_info->scaled; - - /* Determine if we should use the chunk cache */ - if ((cacheable = H5D__chunk_cacheable(io_info, udata.chunk_block.offset, TRUE)) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't tell if chunk is cacheable") - if (cacheable) { - /* Load the chunk into cache. But if the whole chunk is written, - * simply allocate space instead of load the chunk. */ - hbool_t entire_chunk = TRUE; /* Whether whole chunk is selected */ - - /* Compute # of bytes accessed in chunk */ - H5_CHECK_OVERFLOW(type_info->dst_type_size, /*From:*/ size_t, /*To:*/ uint32_t); - dst_accessed_bytes = chunk_info->chunk_points * (uint32_t)type_info->dst_type_size; - - /* Determine if we will access all the data in the chunk */ - if (dst_accessed_bytes != ctg_store.contig.dset_size || - (chunk_info->chunk_points * type_info->src_type_size) != ctg_store.contig.dset_size || - fm->fsel_type == H5S_SEL_POINTS) - entire_chunk = FALSE; - - /* Lock the chunk into the cache */ - if (NULL == (chunk = H5D__chunk_lock(io_info, &udata, entire_chunk, FALSE))) - HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to read raw data chunk") - - /* Set up the storage buffer information for this chunk */ - cpt_store.compact.buf = chunk; - - /* Point I/O info at main I/O info for this chunk */ - chk_io_info = &cpt_io_info; + /* Get the actual chunk information from the skip list node */ + chunk_info = H5D_CHUNK_GET_NODE_INFO(fm, chunk_node); + + /* Get the info for the chunk in the file */ + if (H5D__chunk_lookup(io_info->dset, chunk_info->scaled, &udata) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address") + + /* There should be no chunks cached */ + HDassert(UINT_MAX == udata.idx_hint); + + /* Sanity check */ + HDassert((H5F_addr_defined(udata.chunk_block.offset) && udata.chunk_block.length > 0) || + (!H5F_addr_defined(udata.chunk_block.offset) && udata.chunk_block.length == 0)); + + /* Set chunk's [scaled] coordinates */ + io_info->store->chunk.scaled = chunk_info->scaled; + + /* Determine if we should use the chunk cache */ + if ((cacheable = H5D__chunk_cacheable(io_info, udata.chunk_block.offset, TRUE)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't tell if chunk is cacheable") + if (cacheable) { + /* Load the chunk into cache. But if the whole chunk is written, + * simply allocate space instead of load the chunk. */ + void * chunk; /* Pointer to locked chunk buffer */ + hbool_t entire_chunk = TRUE; /* Whether whole chunk is selected */ + + /* Compute # of bytes accessed in chunk */ + H5_CHECK_OVERFLOW(type_info->dst_type_size, /*From:*/ size_t, /*To:*/ uint32_t); + dst_accessed_bytes = chunk_info->chunk_points * (uint32_t)type_info->dst_type_size; + + /* Determine if we will access all the data in the chunk */ + if (dst_accessed_bytes != ctg_store.contig.dset_size || + (chunk_info->chunk_points * type_info->src_type_size) != ctg_store.contig.dset_size || + fm->fsel_type == H5S_SEL_POINTS) + entire_chunk = FALSE; + + /* Lock the chunk into the cache */ + if (NULL == (chunk = H5D__chunk_lock(io_info, &udata, entire_chunk, FALSE))) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to read raw data chunk") + + /* Set up the storage buffer information for this chunk */ + cpt_store.compact.buf = chunk; + + /* Perform the actual write operation */ + if ((io_info->io_ops.single_write)(&cpt_io_info, type_info, (hsize_t)chunk_info->chunk_points, + chunk_info->fspace, chunk_info->mspace) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "chunked write failed") + + /* Release the cache lock on the chunk */ + if (H5D__chunk_unlock(io_info, &udata, TRUE, chunk, dst_accessed_bytes) < 0) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to unlock raw data chunk") + } /* end if */ + else { + /* If the chunk hasn't been allocated on disk, do so now. */ + if (!H5F_addr_defined(udata.chunk_block.offset)) { + /* Compose chunked index info struct */ + idx_info.f = io_info->dset->oloc.file; + idx_info.pline = &(io_info->dset->shared->dcpl_cache.pline); + idx_info.layout = &(io_info->dset->shared->layout.u.chunk); + idx_info.storage = &(io_info->dset->shared->layout.storage.u.chunk); + + /* Set up the size of chunk for user data */ + udata.chunk_block.length = io_info->dset->shared->layout.u.chunk.size; + + /* Allocate the chunk */ + if (H5D__chunk_file_alloc(&idx_info, NULL, &udata.chunk_block, &need_insert, + chunk_info->scaled) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINSERT, FAIL, + "unable to insert/resize chunk on chunk level") + + /* Make sure the address of the chunk is returned. */ + if (!H5F_addr_defined(udata.chunk_block.offset)) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "chunk address isn't defined") + + /* Cache the new chunk information */ + H5D__chunk_cinfo_cache_update(&io_info->dset->shared->cache.chunk.last, &udata); + + /* Insert chunk into index */ + if (need_insert && io_info->dset->shared->layout.storage.u.chunk.ops->insert) + if ((io_info->dset->shared->layout.storage.u.chunk.ops->insert)(&idx_info, &udata, + NULL) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINSERT, FAIL, + "unable to insert chunk addr into index") + } /* end if */ + + /* Add chunk to list for selection I/O */ + chunk_mem_spaces[num_chunks] = chunk_info->mspace; + chunk_file_spaces[num_chunks] = chunk_info->fspace; + chunk_addrs[num_chunks] = udata.chunk_block.offset; + num_chunks++; + } /* end else */ + + /* Advance to next chunk in list */ + chunk_node = H5D_CHUNK_GET_NEXT_NODE(fm, chunk_node); + } /* end while */ + + /* Issue selection I/O call (we can skip the page buffer because we've + * already verified it won't be used, and the metadata accumulator + * because this is raw data) */ + if (H5F_shared_select_write(H5F_SHARED(io_info->dset->oloc.file), H5FD_MEM_DRAW, (uint32_t)num_chunks, + (const H5S_t *const *)chunk_mem_spaces, + (const H5S_t *const *)chunk_file_spaces, chunk_addrs, element_sizes, + bufs) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "chunk selection read failed") + + /* Clean up memory */ + if (chunk_mem_spaces != chunk_mem_spaces_static) { + HDassert(chunk_mem_spaces); + HDassert(chunk_file_spaces != chunk_file_spaces_static); + HDassert(chunk_addrs != chunk_addrs_static); + H5MM_free(chunk_mem_spaces); + chunk_mem_spaces = NULL; + H5MM_free(chunk_file_spaces); + chunk_file_spaces = NULL; + H5MM_free(chunk_addrs); + chunk_addrs = NULL; } /* end if */ - else { - /* If the chunk hasn't been allocated on disk, do so now. */ - if (!H5F_addr_defined(udata.chunk_block.offset)) { - /* Compose chunked index info struct */ - idx_info.f = io_info->dset->oloc.file; - idx_info.pline = &(io_info->dset->shared->dcpl_cache.pline); - idx_info.layout = &(io_info->dset->shared->layout.u.chunk); - idx_info.storage = &(io_info->dset->shared->layout.storage.u.chunk); - - /* Set up the size of chunk for user data */ - udata.chunk_block.length = io_info->dset->shared->layout.u.chunk.size; - - /* Allocate the chunk */ - if (H5D__chunk_file_alloc(&idx_info, NULL, &udata.chunk_block, &need_insert, - chunk_info->scaled) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTINSERT, FAIL, - "unable to insert/resize chunk on chunk level") - - /* Make sure the address of the chunk is returned. */ - if (!H5F_addr_defined(udata.chunk_block.offset)) - HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "chunk address isn't defined") - - /* Cache the new chunk information */ - H5D__chunk_cinfo_cache_update(&io_info->dset->shared->cache.chunk.last, &udata); + } /* end if */ + else { + /* Iterate through nodes in chunk skip list */ + chunk_node = H5D_CHUNK_GET_FIRST_NODE(fm); + while (chunk_node) { + H5D_chunk_info_t * chunk_info; /* Chunk information */ + H5D_chk_idx_info_t idx_info; /* Chunked index info */ + H5D_io_info_t * chk_io_info; /* Pointer to I/O info object for this chunk */ + void * chunk; /* Pointer to locked chunk buffer */ + H5D_chunk_ud_t udata; /* Index pass-through */ + htri_t cacheable; /* Whether the chunk is cacheable */ + hbool_t need_insert = FALSE; /* Whether the chunk needs to be inserted into the index */ + + /* Get the actual chunk information from the skip list node */ + chunk_info = H5D_CHUNK_GET_NODE_INFO(fm, chunk_node); + + /* Look up the chunk */ + if (H5D__chunk_lookup(io_info->dset, chunk_info->scaled, &udata) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address") + + /* Sanity check */ + HDassert((H5F_addr_defined(udata.chunk_block.offset) && udata.chunk_block.length > 0) || + (!H5F_addr_defined(udata.chunk_block.offset) && udata.chunk_block.length == 0)); + + /* Set chunk's [scaled] coordinates */ + io_info->store->chunk.scaled = chunk_info->scaled; + + /* Determine if we should use the chunk cache */ + if ((cacheable = H5D__chunk_cacheable(io_info, udata.chunk_block.offset, TRUE)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't tell if chunk is cacheable") + if (cacheable) { + /* Load the chunk into cache. But if the whole chunk is written, + * simply allocate space instead of load the chunk. */ + hbool_t entire_chunk = TRUE; /* Whether whole chunk is selected */ + + /* Compute # of bytes accessed in chunk */ + H5_CHECK_OVERFLOW(type_info->dst_type_size, /*From:*/ size_t, /*To:*/ uint32_t); + dst_accessed_bytes = chunk_info->chunk_points * (uint32_t)type_info->dst_type_size; + + /* Determine if we will access all the data in the chunk */ + if (dst_accessed_bytes != ctg_store.contig.dset_size || + (chunk_info->chunk_points * type_info->src_type_size) != ctg_store.contig.dset_size || + fm->fsel_type == H5S_SEL_POINTS) + entire_chunk = FALSE; + + /* Lock the chunk into the cache */ + if (NULL == (chunk = H5D__chunk_lock(io_info, &udata, entire_chunk, FALSE))) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to read raw data chunk") + + /* Set up the storage buffer information for this chunk */ + cpt_store.compact.buf = chunk; + + /* Point I/O info at main I/O info for this chunk */ + chk_io_info = &cpt_io_info; } /* end if */ + else { + /* If the chunk hasn't been allocated on disk, do so now. */ + if (!H5F_addr_defined(udata.chunk_block.offset)) { + /* Compose chunked index info struct */ + idx_info.f = io_info->dset->oloc.file; + idx_info.pline = &(io_info->dset->shared->dcpl_cache.pline); + idx_info.layout = &(io_info->dset->shared->layout.u.chunk); + idx_info.storage = &(io_info->dset->shared->layout.storage.u.chunk); + + /* Set up the size of chunk for user data */ + udata.chunk_block.length = io_info->dset->shared->layout.u.chunk.size; + + /* Allocate the chunk */ + if (H5D__chunk_file_alloc(&idx_info, NULL, &udata.chunk_block, &need_insert, + chunk_info->scaled) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINSERT, FAIL, + "unable to insert/resize chunk on chunk level") + + /* Make sure the address of the chunk is returned. */ + if (!H5F_addr_defined(udata.chunk_block.offset)) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "chunk address isn't defined") + + /* Cache the new chunk information */ + H5D__chunk_cinfo_cache_update(&io_info->dset->shared->cache.chunk.last, &udata); + } /* end if */ - /* Set up the storage address information for this chunk */ - ctg_store.contig.dset_addr = udata.chunk_block.offset; + /* Set up the storage address information for this chunk */ + ctg_store.contig.dset_addr = udata.chunk_block.offset; - /* No chunk cached */ - chunk = NULL; + /* No chunk cached */ + chunk = NULL; - /* Point I/O info at temporary I/O info for this chunk */ - chk_io_info = &ctg_io_info; - } /* end else */ + /* Point I/O info at temporary I/O info for this chunk */ + chk_io_info = &ctg_io_info; + } /* end else */ - /* Perform the actual write operation */ - if ((io_info->io_ops.single_write)(chk_io_info, type_info, (hsize_t)chunk_info->chunk_points, - chunk_info->fspace, chunk_info->mspace) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "chunked write failed") + /* Perform the actual write operation */ + if ((io_info->io_ops.single_write)(chk_io_info, type_info, (hsize_t)chunk_info->chunk_points, + chunk_info->fspace, chunk_info->mspace) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "chunked write failed") - /* Release the cache lock on the chunk, or insert chunk into index. */ - if (chunk) { - if (H5D__chunk_unlock(io_info, &udata, TRUE, chunk, dst_accessed_bytes) < 0) - HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to unlock raw data chunk") - } /* end if */ - else { - if (need_insert && io_info->dset->shared->layout.storage.u.chunk.ops->insert) - if ((io_info->dset->shared->layout.storage.u.chunk.ops->insert)(&idx_info, &udata, NULL) < 0) - HGOTO_ERROR(H5E_DATASET, H5E_CANTINSERT, FAIL, "unable to insert chunk addr into index") - } /* end else */ + /* Release the cache lock on the chunk, or insert chunk into index. */ + if (chunk) { + if (H5D__chunk_unlock(io_info, &udata, TRUE, chunk, dst_accessed_bytes) < 0) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "unable to unlock raw data chunk") + } /* end if */ + else { + if (need_insert && io_info->dset->shared->layout.storage.u.chunk.ops->insert) + if ((io_info->dset->shared->layout.storage.u.chunk.ops->insert)(&idx_info, &udata, NULL) < + 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTINSERT, FAIL, + "unable to insert chunk addr into index") + } /* end else */ - /* Advance to next chunk in list */ - chunk_node = H5D_CHUNK_GET_NEXT_NODE(fm, chunk_node); - } /* end while */ + /* Advance to next chunk in list */ + chunk_node = H5D_CHUNK_GET_NEXT_NODE(fm, chunk_node); + } /* end while */ + } /* end else */ done: + /* Cleanup on failure */ + if (ret_value < 0) { + if (chunk_mem_spaces != chunk_mem_spaces_static) + chunk_mem_spaces = H5MM_xfree(chunk_mem_spaces); + if (chunk_file_spaces != chunk_file_spaces_static) + chunk_file_spaces = H5MM_xfree(chunk_file_spaces); + if (chunk_addrs != chunk_addrs_static) + chunk_addrs = H5MM_xfree(chunk_addrs); + } /* end if */ + + /* Make sure we cleaned up */ + HDassert(!chunk_mem_spaces || chunk_mem_spaces == chunk_mem_spaces_static); + HDassert(!chunk_file_spaces || chunk_file_spaces == chunk_file_spaces_static); + HDassert(!chunk_addrs || chunk_addrs == chunk_addrs_static); + FUNC_LEAVE_NOAPI(ret_value) } /* H5D__chunk_write() */ diff --git a/src/H5Dcompact.c b/src/H5Dcompact.c index fe41298..f68a93a 100644 --- a/src/H5Dcompact.c +++ b/src/H5Dcompact.c @@ -54,9 +54,8 @@ /* Layout operation callbacks */ static herr_t H5D__compact_construct(H5F_t *f, H5D_t *dset); static hbool_t H5D__compact_is_space_alloc(const H5O_storage_t *storage); -static herr_t H5D__compact_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, - hsize_t nelmts, const H5S_t *file_space, const H5S_t *mem_space, - H5D_chunk_map_t *cm); +static herr_t H5D__compact_io_init(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts, + const H5S_t *file_space, const H5S_t *mem_space, H5D_chunk_map_t *cm); static ssize_t H5D__compact_readvv(const H5D_io_info_t *io_info, size_t dset_max_nseq, size_t *dset_curr_seq, size_t dset_size_arr[], hsize_t dset_offset_arr[], size_t mem_max_nseq, size_t *mem_curr_seq, size_t mem_size_arr[], hsize_t mem_offset_arr[]); @@ -227,7 +226,7 @@ H5D__compact_is_space_alloc(const H5O_storage_t H5_ATTR_UNUSED *storage) *------------------------------------------------------------------------- */ static herr_t -H5D__compact_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t H5_ATTR_UNUSED *type_info, +H5D__compact_io_init(H5D_io_info_t *io_info, const H5D_type_info_t H5_ATTR_UNUSED *type_info, hsize_t H5_ATTR_UNUSED nelmts, const H5S_t H5_ATTR_UNUSED *file_space, const H5S_t H5_ATTR_UNUSED *mem_space, H5D_chunk_map_t H5_ATTR_UNUSED *cm) { diff --git a/src/H5Dcontig.c b/src/H5Dcontig.c index 4dc6f72..d7ebbee 100644 --- a/src/H5Dcontig.c +++ b/src/H5Dcontig.c @@ -43,6 +43,7 @@ #include "H5FOprivate.h" /* File objects */ #include "H5Oprivate.h" /* Object headers */ #include "H5Pprivate.h" /* Property lists */ +#include "H5PBprivate.h" /* Page Buffer */ #include "H5VMprivate.h" /* Vector and array functions */ /****************/ @@ -90,9 +91,8 @@ typedef struct H5D_contig_writevv_ud_t { /* Layout operation callbacks */ static herr_t H5D__contig_construct(H5F_t *f, H5D_t *dset); static herr_t H5D__contig_init(H5F_t *f, const H5D_t *dset, hid_t dapl_id); -static herr_t H5D__contig_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, - hsize_t nelmts, const H5S_t *file_space, const H5S_t *mem_space, - H5D_chunk_map_t *cm); +static herr_t H5D__contig_io_init(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts, + const H5S_t *file_space, const H5S_t *mem_space, H5D_chunk_map_t *cm); static ssize_t H5D__contig_readvv(const H5D_io_info_t *io_info, size_t dset_max_nseq, size_t *dset_curr_seq, size_t dset_len_arr[], hsize_t dset_offset_arr[], size_t mem_max_nseq, size_t *mem_curr_seq, size_t mem_len_arr[], hsize_t mem_offset_arr[]); @@ -103,6 +103,7 @@ static herr_t H5D__contig_flush(H5D_t *dset); /* Helper routines */ static herr_t H5D__contig_write_one(H5D_io_info_t *io_info, hsize_t offset, size_t size); +static htri_t H5D__contig_may_use_select_io(const H5D_io_info_t *io_info, H5D_io_op_type_t op_type); /*********************/ /* Package Variables */ @@ -549,19 +550,81 @@ H5D__contig_is_data_cached(const H5D_shared_t *shared_dset) *------------------------------------------------------------------------- */ static herr_t -H5D__contig_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t H5_ATTR_UNUSED *type_info, +H5D__contig_io_init(H5D_io_info_t *io_info, const H5D_type_info_t H5_ATTR_UNUSED *type_info, hsize_t H5_ATTR_UNUSED nelmts, const H5S_t H5_ATTR_UNUSED *file_space, const H5S_t H5_ATTR_UNUSED *mem_space, H5D_chunk_map_t H5_ATTR_UNUSED *cm) { - FUNC_ENTER_STATIC_NOERR + htri_t use_selection_io = FALSE; /* Whether to use selection I/O */ + htri_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC io_info->store->contig.dset_addr = io_info->dset->shared->layout.storage.u.contig.addr; io_info->store->contig.dset_size = io_info->dset->shared->layout.storage.u.contig.size; - FUNC_LEAVE_NOAPI(SUCCEED) + /* Check if we're performing selection I/O */ + if ((use_selection_io = H5D__contig_may_use_select_io(io_info, H5D_IO_OP_READ)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't check if selection I/O is possible") + io_info->use_select_io = (hbool_t)use_selection_io; + +done: + FUNC_LEAVE_NOAPI(ret_value) } /* end H5D__contig_io_init() */ /*------------------------------------------------------------------------- + * Function: H5D__contig_may_use_select_io + * + * Purpose: A small internal function to if it may be possible to use + * selection I/O. + * + * Return: TRUE or FALSE + * + * Programmer: Neil Fortner + * 3 August 2021 + * + *------------------------------------------------------------------------- + */ +static htri_t +H5D__contig_may_use_select_io(const H5D_io_info_t *io_info, H5D_io_op_type_t op_type) +{ + const H5D_t *dataset = io_info->dset; /* Local pointer to dataset info */ + htri_t ret_value = FAIL; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity check */ + HDassert(io_info); + HDassert(dataset); + HDassert(op_type == H5D_IO_OP_READ || op_type == H5D_IO_OP_WRITE); + + /* Don't use selection I/O if it's globally disabled, if there is a type + * conversion, or if it's not a contiguous dataset, or if the sieve buffer + * exists (write) or is dirty (read) */ + if (!H5_use_selection_io_g || io_info->io_ops.single_read != H5D__select_read || + io_info->layout_ops.readvv != H5D__contig_readvv || + (op_type == H5D_IO_OP_READ && io_info->dset->shared->cache.contig.sieve_dirty) || + (op_type == H5D_IO_OP_WRITE && io_info->dset->shared->cache.contig.sieve_buf)) + ret_value = FALSE; + else { + htri_t page_buf_enabled; + + HDassert(io_info->io_ops.single_write == H5D__select_write); + HDassert(io_info->layout_ops.writevv == H5D__contig_writevv); + + /* Check if the page buffer is enabled */ + if ((page_buf_enabled = H5PB_enabled(io_info->f_sh, H5FD_MEM_DRAW)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't check if page buffer is enabled") + if (page_buf_enabled) + ret_value = FALSE; + else + ret_value = TRUE; + } /* end else */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5D__contig_may_use_select_io() */ + +/*------------------------------------------------------------------------- * Function: H5D__contig_read * * Purpose: Read from a contiguous dataset. @@ -577,7 +640,7 @@ herr_t H5D__contig_read(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts, const H5S_t *file_space, const H5S_t *mem_space, H5D_chunk_map_t H5_ATTR_UNUSED *fm) { - herr_t ret_value = SUCCEED; /*return value */ + herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_PACKAGE @@ -588,8 +651,20 @@ H5D__contig_read(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize HDassert(mem_space); HDassert(file_space); - /* Read data */ - if ((io_info->io_ops.single_read)(io_info, type_info, nelmts, file_space, mem_space) < 0) + if (io_info->use_select_io) { + size_t dst_type_size = type_info->dst_type_size; + + /* Issue selection I/O call (we can skip the page buffer because we've + * already verified it won't be used, and the metadata accumulator + * because this is raw data) */ + if (H5F_shared_select_read(H5F_SHARED(io_info->dset->oloc.file), H5FD_MEM_DRAW, nelmts > 0 ? 1 : 0, + &mem_space, &file_space, &(io_info->store->contig.dset_addr), + &dst_type_size, &(io_info->u.rbuf)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "contiguous selection read failed") + } /* end if */ + else + /* Read data through legacy (non-selection I/O) pathway */ + if ((io_info->io_ops.single_read)(io_info, type_info, nelmts, file_space, mem_space) < 0) HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "contiguous read failed") done: @@ -612,7 +687,7 @@ herr_t H5D__contig_write(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts, const H5S_t *file_space, const H5S_t *mem_space, H5D_chunk_map_t H5_ATTR_UNUSED *fm) { - herr_t ret_value = SUCCEED; /*return value */ + herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_PACKAGE @@ -623,8 +698,20 @@ H5D__contig_write(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsiz HDassert(mem_space); HDassert(file_space); - /* Write data */ - if ((io_info->io_ops.single_write)(io_info, type_info, nelmts, file_space, mem_space) < 0) + if (io_info->use_select_io) { + size_t dst_type_size = type_info->dst_type_size; + + /* Issue selection I/O call (we can skip the page buffer because we've + * already verified it won't be used, and the metadata accumulator + * because this is raw data) */ + if (H5F_shared_select_write(H5F_SHARED(io_info->dset->oloc.file), H5FD_MEM_DRAW, nelmts > 0 ? 1 : 0, + &mem_space, &file_space, &(io_info->store->contig.dset_addr), + &dst_type_size, &(io_info->u.wbuf)) < 0) + HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "contiguous selection write failed") + } /* end if */ + else + /* Write data through legacy (non-selection I/O) pathway */ + if ((io_info->io_ops.single_write)(io_info, type_info, nelmts, file_space, mem_space) < 0) HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "contiguous write failed") done: diff --git a/src/H5Defl.c b/src/H5Defl.c index 85c9dba..ce6d481 100644 --- a/src/H5Defl.c +++ b/src/H5Defl.c @@ -60,9 +60,9 @@ typedef struct H5D_efl_writevv_ud_t { /********************/ /* Layout operation callbacks */ -static herr_t H5D__efl_construct(H5F_t *f, H5D_t *dset); -static herr_t H5D__efl_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts, - const H5S_t *file_space, const H5S_t *mem_space, H5D_chunk_map_t *cm); +static herr_t H5D__efl_construct(H5F_t *f, H5D_t *dset); +static herr_t H5D__efl_io_init(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts, + const H5S_t *file_space, const H5S_t *mem_space, H5D_chunk_map_t *cm); static ssize_t H5D__efl_readvv(const H5D_io_info_t *io_info, size_t dset_max_nseq, size_t *dset_curr_seq, size_t dset_len_arr[], hsize_t dset_offset_arr[], size_t mem_max_nseq, size_t *mem_curr_seq, size_t mem_len_arr[], hsize_t mem_offset_arr[]); @@ -197,7 +197,7 @@ H5D__efl_is_space_alloc(const H5O_storage_t H5_ATTR_UNUSED *storage) *------------------------------------------------------------------------- */ static herr_t -H5D__efl_io_init(const H5D_io_info_t *io_info, const H5D_type_info_t H5_ATTR_UNUSED *type_info, +H5D__efl_io_init(H5D_io_info_t *io_info, const H5D_type_info_t H5_ATTR_UNUSED *type_info, hsize_t H5_ATTR_UNUSED nelmts, const H5S_t H5_ATTR_UNUSED *file_space, const H5S_t H5_ATTR_UNUSED *mem_space, H5D_chunk_map_t H5_ATTR_UNUSED *cm) { diff --git a/src/H5Dio.c b/src/H5Dio.c index 6bd4666..c245a5c 100644 --- a/src/H5Dio.c +++ b/src/H5Dio.c @@ -565,6 +565,10 @@ H5D__ioinfo_init(H5D_t *dset, const H5D_type_info_t *type_info, H5D_storage_t *s io_info->io_ops.single_write = H5D__scatgath_write; } /* end else */ + /* Start with selection I/O off, layout callback will turn it on if + * appropriate */ + io_info->use_select_io = FALSE; + #ifdef H5_HAVE_PARALLEL /* Determine if the file was opened with an MPI VFD */ io_info->using_mpi_vfd = H5F_HAS_FEATURE(dset->oloc.file, H5FD_FEAT_HAS_MPI); @@ -803,12 +807,17 @@ H5D__ioinfo_adjust(H5D_io_info_t *io_info, const H5D_t *dset, const H5S_t *file_ /* Check if we can use the optimized parallel I/O routines */ if (opt == TRUE) { - /* Override the I/O op pointers to the MPI-specific routines */ - io_info->io_ops.multi_read = dset->shared->layout.ops->par_read; - io_info->io_ops.multi_write = dset->shared->layout.ops->par_write; - io_info->io_ops.single_read = H5D__mpio_select_read; - io_info->io_ops.single_write = H5D__mpio_select_write; - } /* end if */ + /* Override the I/O op pointers to the MPI-specific routines, unless + * selection I/O is to be used - in this case the file driver will + * handle collective I/O */ + /* Check for selection/vector support in file driver? -NAF */ + if (!io_info->use_select_io) { + io_info->io_ops.multi_read = dset->shared->layout.ops->par_read; + io_info->io_ops.multi_write = dset->shared->layout.ops->par_write; + io_info->io_ops.single_read = H5D__mpio_select_read; + io_info->io_ops.single_write = H5D__mpio_select_write; + } /* end if */ + } /* end if */ else { int comm_size = 0; diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h index e07ba30..e6b6143 100644 --- a/src/H5Dpkg.h +++ b/src/H5Dpkg.h @@ -121,9 +121,8 @@ typedef herr_t (*H5D_layout_construct_func_t)(H5F_t *f, H5D_t *dset); typedef herr_t (*H5D_layout_init_func_t)(H5F_t *f, const H5D_t *dset, hid_t dapl_id); typedef hbool_t (*H5D_layout_is_space_alloc_func_t)(const H5O_storage_t *storage); typedef hbool_t (*H5D_layout_is_data_cached_func_t)(const H5D_shared_t *shared_dset); -typedef herr_t (*H5D_layout_io_init_func_t)(const struct H5D_io_info_t *io_info, - const H5D_type_info_t *type_info, hsize_t nelmts, - const H5S_t *file_space, const H5S_t *mem_space, +typedef herr_t (*H5D_layout_io_init_func_t)(struct H5D_io_info_t *io_info, const H5D_type_info_t *type_info, + hsize_t nelmts, const H5S_t *file_space, const H5S_t *mem_space, struct H5D_chunk_map_t *cm); typedef herr_t (*H5D_layout_read_func_t)(struct H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t nelmts, const H5S_t *file_space, const H5S_t *mem_space, @@ -223,6 +222,7 @@ typedef struct H5D_io_info_t { H5D_layout_ops_t layout_ops; /* Dataset layout I/O operation function pointers */ H5D_io_ops_t io_ops; /* I/O operation function pointers */ H5D_io_op_type_t op_type; + hbool_t use_select_io; /* Whether to use selection I/O */ union { void * rbuf; /* Pointer to buffer for read */ const void *wbuf; /* Pointer to buffer to write */ @@ -1436,6 +1436,371 @@ done: } /* end H5FDwrite() */ /*------------------------------------------------------------------------- + * Function: H5FDread_vector + * + * Purpose: Perform count reads from the specified file at the offsets + * provided in the addrs array, with the lengths and memory + * types provided in the sizes and types arrays. Data read + * is returned in the buffers provided in the bufs array. + * + * All reads are done according to the data transfer property + * list dxpl_id (which may be the constant H5P_DEFAULT). + * + * Return: Success: SUCCEED + * All reads have completed successfully, and + * the results havce been into the supplied + * buffers. + * + * Failure: FAIL + * The contents of supplied buffers are undefined. + * + * Programmer: JRM -- 6/10/20 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +herr_t +H5FDread_vector(H5FD_t *file, hid_t dxpl_id, uint32_t count, H5FD_mem_t types[], haddr_t addrs[], + size_t sizes[], void *bufs[] /* out */) +{ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_API(FAIL) + H5TRACE7("e", "*#iIu*Mt*a*zx", file, dxpl_id, count, types, addrs, sizes, bufs); + + /* Check arguments */ + if (!file) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file pointer cannot be NULL") + + if (!file->cls) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file class pointer cannot be NULL") + + if ((!types) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "types parameter can't be NULL if count is positive") + + if ((!addrs) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "addrs parameter can't be NULL if count is positive") + + if ((!sizes) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "sizes parameter can't be NULL if count is positive") + + if ((!bufs) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bufs parameter can't be NULL if count is positive") + + if ((count > 0) && (sizes[0] == 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "sizes[0] can't be 0") + + if ((count > 0) && (types[0] == H5FD_MEM_NOLIST)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "count[0] can't be H5FD_MEM_NOLIST") + + /* Get the default dataset transfer property list if the user + * didn't provide one + */ + if (H5P_DEFAULT == dxpl_id) { + dxpl_id = H5P_DATASET_XFER_DEFAULT; + } + else { + if (TRUE != H5P_isa_class(dxpl_id, H5P_DATASET_XFER)) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a data transfer property list") + } + + /* Set DXPL for operation */ + H5CX_set_dxpl(dxpl_id); + + /* Call private function */ + /* JRM -- review this */ + /* (Note compensating for base addresses addition in internal routine) */ + if (H5FD_read_vector(file, count, types, addrs, sizes, bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "file vector read request failed") + +done: + FUNC_LEAVE_API(ret_value) +} /* end H5FDread_vector() */ + +/*------------------------------------------------------------------------- + * Function: H5FDwrite_vector + * + * Purpose: Perform count writes to the specified file at the offsets + * provided in the addrs array, with the lengths and memory + * types provided in the sizes and types arrays. Data to be + * written is in the buffers provided in the bufs array. + * + * All writes are done according to the data transfer property + * list dxpl_id (which may be the constant H5P_DEFAULT). + * + * Return: Success: SUCCEED + * All writes have completed successfully + * + * Failure: FAIL + * One or more of the writes failed. + * + * Programmer: JRM -- 6/10/20 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +herr_t +H5FDwrite_vector(H5FD_t *file, hid_t dxpl_id, uint32_t count, H5FD_mem_t types[], haddr_t addrs[], + size_t sizes[], const void *bufs[] /* in */) +{ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_API(FAIL) + H5TRACE7("e", "*#iIu*Mt*a*z**x", file, dxpl_id, count, types, addrs, sizes, bufs); + + /* Check arguments */ + if (!file) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file pointer cannot be NULL") + + if (!file->cls) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file class pointer cannot be NULL") + + if ((!types) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "types parameter can't be NULL if count is positive") + + if ((!addrs) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "addrs parameter can't be NULL if count is positive") + + if ((!sizes) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "sizes parameter can't be NULL if count is positive") + + if ((!bufs) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bufs parameter can't be NULL if count is positive") + + if ((count > 0) && (sizes[0] == 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "sizes[0] can't be 0") + + if ((count > 0) && (types[0] == H5FD_MEM_NOLIST)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "count[0] can't be H5FD_MEM_NOLIST") + + /* Get the default dataset transfer property list if the user didn't provide one */ + if (H5P_DEFAULT == dxpl_id) { + dxpl_id = H5P_DATASET_XFER_DEFAULT; + } + else { + if (TRUE != H5P_isa_class(dxpl_id, H5P_DATASET_XFER)) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a data transfer property list") + } + + /* Set DXPL for operation */ + H5CX_set_dxpl(dxpl_id); + + /* Call private function */ /* JRM -- review this */ + /* (Note compensating for base address addition in internal routine) */ + if (H5FD_write_vector(file, count, types, addrs, sizes, bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "file vector write request failed") + +done: + FUNC_LEAVE_API(ret_value) +} /* end H5FDwrite_vector() */ + +/*------------------------------------------------------------------------- + * Function: H5FDread_selection + * + * Purpose: Perform count reads from the specified file at the + * locations selected in the dataspaces in the file_spaces + * array, with each of those dataspaces starting at the file + * address specified by the corresponding element of the + * offsets array, and with the size of each element in the + * dataspace specified by the corresponding element of the + * element_sizes array. The memory type provided by type is + * the same for all selections. Data read is returned in + * the locations selected in the dataspaces in the + * mem_spaces array, within the buffers provided in the + * corresponding elements of the bufs array. + * + * If i > 0 and element_sizes[i] == 0, presume + * element_sizes[n] = element_sizes[i-1] for all n >= i and + * < count. + * + * If the underlying VFD supports selection reads, pass the + * call through directly. + * + * If it doesn't, convert the vector write into a sequence + * of individual reads. + * + * All reads are done according to the data transfer property + * list dxpl_id (which may be the constant H5P_DEFAULT). + * + * Return: Success: SUCCEED + * All reads have completed successfully, and + * the results havce been into the supplied + * buffers. + * + * Failure: FAIL + * The contents of supplied buffers are undefined. + * + * Programmer: NAF -- 5/19/21 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +herr_t +H5FDread_selection(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, uint32_t count, hid_t mem_space_ids[], + hid_t file_space_ids[], haddr_t offsets[], size_t element_sizes[], void *bufs[] /* out */) +{ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_API(FAIL) + H5TRACE9("e", "*#MtiIu*i*i*a*zx", file, type, dxpl_id, count, mem_space_ids, file_space_ids, offsets, + element_sizes, bufs); + + /* Check arguments */ + if (!file) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file pointer cannot be NULL") + + if (!file->cls) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file class pointer cannot be NULL") + + if ((!mem_space_ids) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "mem_spaces parameter can't be NULL if count is positive") + + if ((!file_space_ids) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file_spaces parameter can't be NULL if count is positive") + + if ((!offsets) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "offsets parameter can't be NULL if count is positive") + + if ((!element_sizes) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "element_sizes parameter can't be NULL if count is positive") + + if ((!bufs) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bufs parameter can't be NULL if count is positive") + + if ((count > 0) && (element_sizes[0] == 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "sizes[0] can't be 0") + + if ((count > 0) && (bufs[0] == NULL)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bufs[0] can't be NULL") + + /* Get the default dataset transfer property list if the user didn't provide one */ + if (H5P_DEFAULT == dxpl_id) { + dxpl_id = H5P_DATASET_XFER_DEFAULT; + } + else { + if (TRUE != H5P_isa_class(dxpl_id, H5P_DATASET_XFER)) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a data transfer property list") + } + + /* Set DXPL for operation */ + H5CX_set_dxpl(dxpl_id); + + /* Call private function */ + /* (Note compensating for base address addition in internal routine) */ + if (H5FD_read_selection_id(file, type, count, mem_space_ids, file_space_ids, offsets, element_sizes, + bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "file selection read request failed") + +done: + FUNC_LEAVE_API(ret_value) +} /* end H5FDread_selection() */ + +/*------------------------------------------------------------------------- + * Function: H5FDwrite_selection + * + * Purpose: Perform count writes to the specified file at the + * locations selected in the dataspaces in the file_spaces + * array, with each of those dataspaces starting at the file + * address specified by the corresponding element of the + * offsets array, and with the size of each element in the + * dataspace specified by the corresponding element of the + * element_sizes array. The memory type provided by type is + * the same for all selections. Data write is from + * the locations selected in the dataspaces in the + * mem_spaces array, within the buffers provided in the + * corresponding elements of the bufs array. + * + * If i > 0 and element_sizes[i] == 0, presume + * element_sizes[n] = element_sizes[i-1] for all n >= i and + * < count. + * + * If the underlying VFD supports selection reads, pass the + * call through directly. + * + * If it doesn't, convert the vector write into a sequence + * of individual writes. + * + * All writes are done according to the data transfer property + * list dxpl_id (which may be the constant H5P_DEFAULT). + * + * Return: Success: SUCCEED + * All writes have completed successfully + * + * Failure: FAIL + * One or more of the writes failed. + * + * Programmer: NAF -- 5/14/21 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +herr_t +H5FDwrite_selection(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, uint32_t count, hid_t mem_space_ids[], + hid_t file_space_ids[], haddr_t offsets[], size_t element_sizes[], const void *bufs[]) +{ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_API(FAIL) + H5TRACE9("e", "*#MtiIu*i*i*a*z**x", file, type, dxpl_id, count, mem_space_ids, file_space_ids, offsets, + element_sizes, bufs); + + /* Check arguments */ + if (!file) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file pointer cannot be NULL") + + if (!file->cls) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file class pointer cannot be NULL") + + if ((!mem_space_ids) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "mem_spaces parameter can't be NULL if count is positive") + + if ((!file_space_ids) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file_spaces parameter can't be NULL if count is positive") + + if ((!offsets) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "offsets parameter can't be NULL if count is positive") + + if ((!element_sizes) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "element_sizes parameter can't be NULL if count is positive") + + if ((!bufs) && (count > 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bufs parameter can't be NULL if count is positive") + + if ((count > 0) && (element_sizes[0] == 0)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "sizes[0] can't be 0") + + if ((count > 0) && (bufs[0] == NULL)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "bufs[0] can't be NULL") + + /* Get the default dataset transfer property list if the user didn't provide one */ + if (H5P_DEFAULT == dxpl_id) { + dxpl_id = H5P_DATASET_XFER_DEFAULT; + } + else { + if (TRUE != H5P_isa_class(dxpl_id, H5P_DATASET_XFER)) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a data transfer property list") + } + + /* Set DXPL for operation */ + H5CX_set_dxpl(dxpl_id); + + /* Call private function */ + /* (Note compensating for base address addition in internal routine) */ + if (H5FD_write_selection_id(file, type, count, mem_space_ids, file_space_ids, offsets, element_sizes, + bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "file selection write request failed") + +done: + FUNC_LEAVE_API(ret_value) +} /* end H5FDwrite_selection() */ + +/*------------------------------------------------------------------------- * Function: H5FDflush * * Purpose: Notify driver to flush all cached data. If the driver has no @@ -1782,8 +2147,7 @@ H5FD_ctl(H5FD_t *file, uint64_t op_code, uint64_t flags, const void *input, void } else if (flags & H5FD_CTL__FAIL_IF_UNKNOWN_FLAG) { - HGOTO_ERROR(H5E_VFL, H5E_FCNTL, FAIL, - "VFD ctl request failed (no ctl callback and fail if unknown flag is set)") + HGOTO_ERROR(H5E_VFL, H5E_FCNTL, FAIL, "VFD ctl request failed (no ctl and fail if unknown flag is set)") } done: diff --git a/src/H5FDcore.c b/src/H5FDcore.c index 820c74f..9bf49ce 100644 --- a/src/H5FDcore.c +++ b/src/H5FDcore.c @@ -178,6 +178,10 @@ static const H5FD_class_t H5FD_core_g = { H5FD__core_get_handle, /* get_handle */ H5FD__core_read, /* read */ H5FD__core_write, /* write */ + NULL, /* read_vector */ + NULL, /* write_vector */ + NULL, /* read_selection */ + NULL, /* write_selection */ H5FD__core_flush, /* flush */ H5FD__core_truncate, /* truncate */ H5FD__core_lock, /* lock */ diff --git a/src/H5FDdevelop.h b/src/H5FDdevelop.h index 4895658..bbfb8af 100644 --- a/src/H5FDdevelop.h +++ b/src/H5FDdevelop.h @@ -187,6 +187,16 @@ typedef struct H5FD_class_t { herr_t (*get_handle)(H5FD_t *file, hid_t fapl, void **file_handle); herr_t (*read)(H5FD_t *file, H5FD_mem_t type, hid_t dxpl, haddr_t addr, size_t size, void *buffer); herr_t (*write)(H5FD_t *file, H5FD_mem_t type, hid_t dxpl, haddr_t addr, size_t size, const void *buffer); + herr_t (*read_vector)(H5FD_t *file, hid_t dxpl, uint32_t count, H5FD_mem_t types[], haddr_t addrs[], + size_t sizes[], void *bufs[]); + herr_t (*write_vector)(H5FD_t *file, hid_t dxpl, uint32_t count, H5FD_mem_t types[], haddr_t addrs[], + size_t sizes[], const void *bufs[]); + herr_t (*read_selection)(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, size_t count, hid_t mem_spaces[], + hid_t file_spaces[], haddr_t offsets[], size_t element_sizes[], + void *bufs[] /*out*/); + herr_t (*write_selection)(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, size_t count, hid_t mem_spaces[], + hid_t file_spaces[], haddr_t offsets[], size_t element_sizes[], + const void *bufs[] /*in*/); herr_t (*flush)(H5FD_t *file, hid_t dxpl_id, hbool_t closing); herr_t (*truncate)(H5FD_t *file, hid_t dxpl_id, hbool_t closing); herr_t (*lock)(H5FD_t *file, hbool_t rw); @@ -250,6 +260,16 @@ H5_DLL herr_t H5FDread(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, haddr_t ad void *buf /*out*/); H5_DLL herr_t H5FDwrite(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, size_t size, const void *buf); +H5_DLL herr_t H5FDread_vector(H5FD_t *file, hid_t dxpl_id, uint32_t count, H5FD_mem_t types[], + haddr_t addrs[], size_t sizes[], void *bufs[] /* out */); +H5_DLL herr_t H5FDwrite_vector(H5FD_t *file, hid_t dxpl_id, uint32_t count, H5FD_mem_t types[], + haddr_t addrs[], size_t sizes[], const void *bufs[] /* in */); +H5_DLL herr_t H5FDread_selection(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, uint32_t count, + hid_t mem_spaces[], hid_t file_spaces[], haddr_t offsets[], + size_t element_sizes[], void *bufs[] /* out */); +H5_DLL herr_t H5FDwrite_selection(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, uint32_t count, + hid_t mem_spaces[], hid_t file_spaces[], haddr_t offsets[], + size_t element_sizes[], const void *bufs[]); H5_DLL herr_t H5FDflush(H5FD_t *file, hid_t dxpl_id, hbool_t closing); H5_DLL herr_t H5FDtruncate(H5FD_t *file, hid_t dxpl_id, hbool_t closing); H5_DLL herr_t H5FDlock(H5FD_t *file, hbool_t rw); diff --git a/src/H5FDdirect.c b/src/H5FDdirect.c index 7cca09f..869dc37 100644 --- a/src/H5FDdirect.c +++ b/src/H5FDdirect.c @@ -167,6 +167,10 @@ static const H5FD_class_t H5FD_direct_g = { H5FD__direct_get_handle, /* get_handle */ H5FD__direct_read, /* read */ H5FD__direct_write, /* write */ + NULL, /* read_vector */ + NULL, /* write_vector */ + NULL, /* read_selection */ + NULL, /* write_selection */ NULL, /* flush */ H5FD__direct_truncate, /* truncate */ H5FD__direct_lock, /* lock */ diff --git a/src/H5FDfamily.c b/src/H5FDfamily.c index af67c78..e45b52e 100644 --- a/src/H5FDfamily.c +++ b/src/H5FDfamily.c @@ -132,6 +132,10 @@ static const H5FD_class_t H5FD_family_g = { H5FD__family_get_handle, /* get_handle */ H5FD__family_read, /* read */ H5FD__family_write, /* write */ + NULL, /* read_vector */ + NULL, /* write_vector */ + NULL, /* read_selection */ + NULL, /* write_selection */ H5FD__family_flush, /* flush */ H5FD__family_truncate, /* truncate */ H5FD__family_lock, /* lock */ diff --git a/src/H5FDhdfs.c b/src/H5FDhdfs.c index ac48b42..a1b9a39 100644 --- a/src/H5FDhdfs.c +++ b/src/H5FDhdfs.c @@ -305,6 +305,10 @@ static const H5FD_class_t H5FD_hdfs_g = { H5FD__hdfs_get_handle, /* get_handle */ H5FD__hdfs_read, /* read */ H5FD__hdfs_write, /* write */ + NULL, /* read_vector */ + NULL, /* write_vector */ + NULL, /* read_selection */ + NULL, /* write_selection */ NULL, /* flush */ H5FD__hdfs_truncate, /* truncate */ NULL, /* lock */ diff --git a/src/H5FDint.c b/src/H5FDint.c index f13f222..0ed49b0 100644 --- a/src/H5FDint.c +++ b/src/H5FDint.c @@ -40,10 +40,40 @@ /* Local Macros */ /****************/ +/* Length of sequence lists requested from dataspace selections */ +#define H5FD_SEQ_LIST_LEN 128 + /******************/ /* Local Typedefs */ /******************/ +/************************************************************************* + * + * H5FD_vsrt_tmp_t + * + * Structure used to store vector I/O request addresses and the associated + * indexes in the addrs[] array for the purpose of determine the sorted + * order. + * + * This is done by allocating an array of H5FD_vsrt_tmp_t of length + * count, loading it with the contents of the addrs[] array and the + * associated indicies, and then sorting it. + * + * This sorted array of H5FD_vsrt_tmp_t is then used to populate sorted + * versions of the types[], addrs[], sizes[] and bufs[] vectors. + * + * addr: haddr_t containing the value of addrs[i], + * + * index: integer containing the value of i used to obtain the + * value of the addr field from the addrs[] vector. + * + *************************************************************************/ + +typedef struct H5FD_vsrt_tmp_t { + haddr_t addr; + int index; +} H5FD_vsrt_tmp_t; + /********************/ /* Package Typedefs */ /********************/ @@ -244,6 +274,1647 @@ done: } /* end H5FD_write() */ /*------------------------------------------------------------------------- + * Function: H5FD_read_vector + * + * Purpose: Private version of H5FDread_vector() + * + * Perform count reads from the specified file at the offsets + * provided in the addrs array, with the lengths and memory + * types provided in the sizes and types arrays. Data read + * is returned in the buffers provided in the bufs array. + * + * If i > 0 and sizes[i] == 0, presume sizes[n] = sizes[i-1] + * for all n >= i and < count. + * + * Similarly, if i > 0 and types[i] == H5FD_MEM_NOLIST, + * presume types[n] = types[i-1] for all n >= i and < count. + * + * If the underlying VFD supports vector reads, pass the + * call through directly. + * + * If it doesn't, convert the vector read into a sequence + * of individual reads. + * + * Note that it is not in general possible to convert a + * vector read into a selection read, because each element + * in the vector read may have a different memory type. + * In contrast, selection reads are of a single type. + * + * Return: Success: SUCCEED + * All reads have completed successfully, and + * the results havce been into the supplied + * buffers. + * + * Failure: FAIL + * The contents of supplied buffers are undefined. + * + * Programmer: JRM -- 6/10/20 + * + * Changes: None + * + *------------------------------------------------------------------------- + */ +herr_t +H5FD_read_vector(H5FD_t *file, uint32_t count, H5FD_mem_t types[], haddr_t addrs[], size_t sizes[], + void *bufs[] /* out */) +{ + hbool_t addrs_cooked = FALSE; + hbool_t extend_sizes = FALSE; + hbool_t extend_types = FALSE; + uint32_t i; + size_t size; + H5FD_mem_t type; + hid_t dxpl_id = H5I_INVALID_HID; /* DXPL for operation */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + /* Sanity checks */ + HDassert(file); + HDassert(file->cls); + HDassert((types) || (count == 0)); + HDassert((addrs) || (count == 0)); + HDassert((sizes) || (count == 0)); + HDassert((bufs) || (count == 0)); + + /* verify that the first elements of the sizes and types arrays are + * valid. + */ + HDassert((count == 0) || (sizes[0] != 0)); + HDassert((count == 0) || (types[0] != H5FD_MEM_NOLIST)); + + /* Get proper DXPL for I/O */ + dxpl_id = H5CX_get_dxpl(); + +#ifndef H5_HAVE_PARALLEL + /* The no-op case + * + * Do not return early for Parallel mode since the I/O could be a + * collective transfer. + */ + if (0 == count) { + HGOTO_DONE(SUCCEED) + } +#endif /* H5_HAVE_PARALLEL */ + + if (file->base_addr > 0) { + + /* apply the base_addr offset to the addrs array. Must undo before + * we return. + */ + for (i = 0; i < count; i++) { + + addrs[i] += file->base_addr; + } + addrs_cooked = TRUE; + } + + /* If the file is open for SWMR read access, allow access to data past + * the end of the allocated space (the 'eoa'). This is done because the + * eoa stored in the file's superblock might be out of sync with the + * objects being written within the file by the application performing + * SWMR write operations. + */ + if ((!(file->access_flags & H5F_ACC_SWMR_READ)) && (count > 0)) { + haddr_t eoa; + + extend_sizes = FALSE; + extend_types = FALSE; + + for (i = 0; i < count; i++) { + + if (!extend_sizes) { + + if (sizes[i] == 0) { + + extend_sizes = TRUE; + size = sizes[i - 1]; + } + else { + + size = sizes[i]; + } + } + + if (!extend_types) { + + if (types[i] == H5FD_MEM_NOLIST) { + + extend_types = TRUE; + type = types[i - 1]; + } + else { + + type = types[i]; + } + } + + if (HADDR_UNDEF == (eoa = (file->cls->get_eoa)(file, type))) + HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "driver get_eoa request failed") + + if ((addrs[i] + size) > eoa) + + HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, + "addr overflow, addrs[%d] = %llu, sizes[%d] = %llu, eoa = %llu", (int)i, + (unsigned long long)(addrs[i]), (int)i, (unsigned long long)size, + (unsigned long long)eoa) + } + } + + /* if the underlying VFD supports vector read, make the call */ + if (file->cls->read_vector) { + + if ((file->cls->read_vector)(file, dxpl_id, count, types, addrs, sizes, bufs) < 0) + + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "driver read vector request failed") + } + else { + + /* otherwise, implement the vector read as a sequence of regular + * read calls. + */ + extend_sizes = FALSE; + extend_types = FALSE; + + for (i = 0; i < count; i++) { + + /* we have already verified that sizes[0] != 0 and + * types[0] != H5FD_MEM_NOLIST + */ + + if (!extend_sizes) { + + if (sizes[i] == 0) { + + extend_sizes = TRUE; + size = sizes[i - 1]; + } + else { + + size = sizes[i]; + } + } + + if (!extend_types) { + + if (types[i] == H5FD_MEM_NOLIST) { + + extend_types = TRUE; + type = types[i - 1]; + } + else { + + type = types[i]; + } + } + + if ((file->cls->read)(file, type, dxpl_id, addrs[i], size, bufs[i]) < 0) + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "driver read request failed") + } + } + +done: + /* undo the base addr offset to the addrs array if necessary */ + if (addrs_cooked) { + + HDassert(file->base_addr > 0); + + for (i = 0; i < count; i++) { + + addrs[i] -= file->base_addr; + } + } + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_read_vector() */ + +/*------------------------------------------------------------------------- + * Function: H5FD_write_vector + * + * Purpose: Private version of H5FDwrite_vector() + * + * Perform count writes to the specified file at the offsets + * provided in the addrs array, with the lengths and memory + * types provided in the sizes and types arrays. Data written + * is taken from the buffers provided in the bufs array. + * + * If i > 0 and sizes[i] == 0, presume sizes[n] = sizes[i-1] + * for all n >= i and < count. + * + * Similarly, if i > 0 and types[i] == H5FD_MEM_NOLIST, + * presume types[n] = types[i-1] for all n >= i and < count. + * + * If the underlying VFD supports vector writes, pass the + * call through directly. + * + * If it doesn't, convert the vector write into a sequence + * of individual writes. + * + * Note that it is not in general possible to convert a + * vector write into a selection write, because each element + * in the vector read may have a different memory type. + * In contrast, selection writes are of a single type. + * + * Return: Success: SUCCEED + * All writes have completed successfully. + * + * Failure: FAIL + * One or more writes failed. + * + * Programmer: JRM -- 6/10/20 + * + * Changes: None + * + *------------------------------------------------------------------------- + */ +herr_t +H5FD_write_vector(H5FD_t *file, uint32_t count, H5FD_mem_t types[], haddr_t addrs[], size_t sizes[], + const void *bufs[]) +{ + hbool_t addrs_cooked = FALSE; + hbool_t extend_sizes = FALSE; + hbool_t extend_types = FALSE; + uint32_t i; + size_t size; + H5FD_mem_t type; + hid_t dxpl_id; /* DXPL for operation */ + haddr_t eoa = HADDR_UNDEF; /* EOA for file */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + /* Sanity checks */ + HDassert(file); + HDassert(file->cls); + HDassert((types) || (count == 0)); + HDassert((addrs) || (count == 0)); + HDassert((sizes) || (count == 0)); + HDassert((bufs) || (count == 0)); + + /* verify that the first elements of the sizes and types arrays are + * valid. + */ + HDassert((count == 0) || (sizes[0] != 0)); + HDassert((count == 0) || (types[0] != H5FD_MEM_NOLIST)); + + /* Get proper DXPL for I/O */ + dxpl_id = H5CX_get_dxpl(); + +#ifndef H5_HAVE_PARALLEL + /* The no-op case + * + * Do not return early for Parallel mode since the I/O could be a + * collective transfer. + */ + if (0 == count) + HGOTO_DONE(SUCCEED) +#endif /* H5_HAVE_PARALLEL */ + + if (file->base_addr > 0) { + + /* apply the base_addr offset to the addrs array. Must undo before + * we return. + */ + for (i = 0; i < count; i++) { + + addrs[i] += file->base_addr; + } + addrs_cooked = TRUE; + } + + extend_sizes = FALSE; + extend_types = FALSE; + + for (i = 0; i < count; i++) { + + if (!extend_sizes) { + + if (sizes[i] == 0) { + + extend_sizes = TRUE; + size = sizes[i - 1]; + } + else { + + size = sizes[i]; + } + } + + if (!extend_types) { + + if (types[i] == H5FD_MEM_NOLIST) { + + extend_types = TRUE; + type = types[i - 1]; + } + else { + + type = types[i]; + } + } + + if (HADDR_UNDEF == (eoa = (file->cls->get_eoa)(file, type))) + + HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "driver get_eoa request failed") + + if ((addrs[i] + size) > eoa) + + HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "addr overflow, addrs[%d] = %llu, sizes[%d] = %llu, \ + eoa = %llu", + (int)i, (unsigned long long)(addrs[i]), (int)i, (unsigned long long)size, + (unsigned long long)eoa) + } + + /* if the underlying VFD supports vector write, make the call */ + if (file->cls->write_vector) { + + if ((file->cls->write_vector)(file, dxpl_id, count, types, addrs, sizes, bufs) < 0) + + HGOTO_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "driver write vector request failed") + } + else { + /* otherwise, implement the vector write as a sequence of regular + * write calls. + */ + extend_sizes = FALSE; + extend_types = FALSE; + + for (i = 0; i < count; i++) { + + /* we have already verified that sizes[0] != 0 and + * types[0] != H5FD_MEM_NOLIST + */ + + if (!extend_sizes) { + + if (sizes[i] == 0) { + + extend_sizes = TRUE; + size = sizes[i - 1]; + } + else { + + size = sizes[i]; + } + } + + if (!extend_types) { + + if (types[i] == H5FD_MEM_NOLIST) { + + extend_types = TRUE; + type = types[i - 1]; + } + else { + + type = types[i]; + } + } + + if ((file->cls->write)(file, type, dxpl_id, addrs[i], size, bufs[i]) < 0) + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "driver write request failed") + } + } + +done: + /* undo the base addr offset to the addrs array if necessary */ + if (addrs_cooked) { + + HDassert(file->base_addr > 0); + + for (i = 0; i < count; i++) { + + addrs[i] -= file->base_addr; + } + } + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_write_vector() */ + +/*------------------------------------------------------------------------- + * Function: H5FD__read_selection_translate + * + * Purpose: Translates a selection read call to a vector read call if + * vector reads are supported, or a series of scalar read + * calls otherwise. + * + * Return: Success: SUCCEED + * All reads have completed successfully, and + * the results havce been into the supplied + * buffers. + * + * Failure: FAIL + * The contents of supplied buffers are undefined. + * + * Programmer: NAF -- 5/13/21 + * + * Changes: None + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD__read_selection_translate(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, uint32_t count, + const H5S_t *const *mem_spaces, const H5S_t *const *file_spaces, + haddr_t offsets[], size_t element_sizes[], void *bufs[] /* out */) +{ + hbool_t extend_sizes = FALSE; + hbool_t extend_bufs = FALSE; + uint32_t i; + size_t element_size; + void * buf; + hbool_t use_vector = FALSE; + haddr_t addrs_static[8]; + haddr_t * addrs = addrs_static; + size_t sizes_static[8]; + size_t * sizes = sizes_static; + void * vec_bufs_static[8]; + void ** vec_bufs = vec_bufs_static; + hsize_t file_off[H5FD_SEQ_LIST_LEN]; + size_t file_len[H5FD_SEQ_LIST_LEN]; + hsize_t mem_off[H5FD_SEQ_LIST_LEN]; + size_t mem_len[H5FD_SEQ_LIST_LEN]; + size_t file_seq_i; + size_t mem_seq_i; + size_t file_nseq; + size_t mem_nseq; + size_t io_len; + size_t nelmts; + hssize_t hss_nelmts; + size_t seq_nelem; + H5S_sel_iter_t file_iter; + H5S_sel_iter_t mem_iter; + H5FD_mem_t types[2] = {type, H5FD_MEM_NOLIST}; + size_t vec_arr_nalloc = sizeof(addrs_static) / sizeof(addrs_static[0]); + size_t vec_arr_nused = 0; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI(FAIL) + + /* Sanity checks */ + HDassert(file); + HDassert(file->cls); + HDassert(vec_arr_nalloc == sizeof(sizes_static) / sizeof(sizes_static[0])); + HDassert(vec_arr_nalloc == sizeof(vec_bufs_static) / sizeof(vec_bufs_static[0])); + HDassert(mem_spaces); + HDassert(file_spaces); + HDassert(offsets); + HDassert(element_sizes); + HDassert(bufs); + + /* Verify that the first elements of the element_sizes and bufs arrays are + * valid. */ + HDassert(element_sizes[0] != 0); + HDassert(bufs[0] != NULL); + + /* Check if we're using vector I/O */ + use_vector = file->cls->read_vector != NULL; + + /* Loop over dataspaces */ + for (i = 0; i < count; i++) { + + /* we have already verified that element_sizes[0] != 0 and bufs[0] + * != NULL */ + + if (!extend_sizes) { + + if (element_sizes[i] == 0) { + + extend_sizes = TRUE; + element_size = element_sizes[i - 1]; + } + else { + + element_size = element_sizes[i]; + } + } + + if (!extend_bufs) { + + if (bufs[i] == NULL) { + + extend_bufs = TRUE; + buf = bufs[i - 1]; + } + else { + + buf = bufs[i]; + } + } + + /* Initialize sequence lists for memory and file spaces */ + if (H5S_select_iter_init(&file_iter, file_spaces[i], element_size, 0) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "can't initialize sequence list for file space") + if (H5S_select_iter_init(&mem_iter, mem_spaces[i], element_size, 0) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "can't initialize sequence list for memory space") + + /* Get the number of elements in selection */ + if ((hss_nelmts = (hssize_t)H5S_GET_SELECT_NPOINTS(file_spaces[i])) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTCOUNT, FAIL, "can't get number of elements selected") + H5_CHECKED_ASSIGN(nelmts, size_t, hss_nelmts, hssize_t); + +#ifndef NDEBUG + /* Verify mem space has the same number of elements */ + { + if ((hss_nelmts = (hssize_t)H5S_GET_SELECT_NPOINTS(mem_spaces[i])) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTCOUNT, FAIL, "can't get number of elements selected") + HDassert((hssize_t)nelmts == hss_nelmts); + } +#endif /* NDEBUG */ + + /* Initialize values so sequence lists are retrieved on the first + * iteration */ + file_seq_i = H5FD_SEQ_LIST_LEN; + mem_seq_i = H5FD_SEQ_LIST_LEN; + file_nseq = 0; + mem_nseq = 0; + + /* Loop until all elements are processed */ + while (file_seq_i < file_nseq || nelmts > 0) { + /* Fill/refill file sequence list if necessary */ + if (file_seq_i == H5FD_SEQ_LIST_LEN) { + if (H5S_SELECT_ITER_GET_SEQ_LIST(&file_iter, H5FD_SEQ_LIST_LEN, SIZE_MAX, &file_nseq, + &seq_nelem, file_off, file_len) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_UNSUPPORTED, FAIL, "sequence length generation failed") + HDassert(file_nseq > 0); + + nelmts -= seq_nelem; + file_seq_i = 0; + } + HDassert(file_seq_i < file_nseq); + + /* Fill/refill memory sequence list if necessary */ + if (mem_seq_i == H5FD_SEQ_LIST_LEN) { + if (H5S_SELECT_ITER_GET_SEQ_LIST(&mem_iter, H5FD_SEQ_LIST_LEN, SIZE_MAX, &mem_nseq, + &seq_nelem, mem_off, mem_len) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_UNSUPPORTED, FAIL, "sequence length generation failed") + HDassert(mem_nseq > 0); + + mem_seq_i = 0; + } + HDassert(mem_seq_i < mem_nseq); + + /* Calculate length of this IO */ + io_len = MIN(file_len[file_seq_i], mem_len[mem_seq_i]); + + /* Check if we're using vector I/O */ + if (use_vector) { + /* Check if we need to extend the arrays */ + if (vec_arr_nused == vec_arr_nalloc) { + /* Check if we're using the static arrays */ + if (addrs == addrs_static) { + HDassert(sizes == sizes_static); + HDassert(vec_bufs == vec_bufs_static); + + /* Allocate dynamic arrays */ + if (NULL == (addrs = H5MM_malloc(sizeof(addrs_static) * 2))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for address list") + if (NULL == (sizes = H5MM_malloc(sizeof(sizes_static) * 2))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for size list") + if (NULL == (vec_bufs = H5MM_malloc(sizeof(vec_bufs_static) * 2))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for buffer list") + + /* Copy the existing data */ + (void)H5MM_memcpy(addrs, addrs_static, sizeof(addrs_static)); + (void)H5MM_memcpy(sizes, sizes_static, sizeof(sizes_static)); + (void)H5MM_memcpy(vec_bufs, vec_bufs_static, sizeof(vec_bufs_static)); + } + else { + void *tmp_ptr; + + /* Reallocate arrays */ + if (NULL == (tmp_ptr = H5MM_realloc(addrs, vec_arr_nalloc * sizeof(*addrs) * 2))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory reallocation failed for address list") + addrs = tmp_ptr; + if (NULL == (tmp_ptr = H5MM_realloc(sizes, vec_arr_nalloc * sizeof(*sizes) * 2))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory reallocation failed for size list") + sizes = tmp_ptr; + if (NULL == + (tmp_ptr = H5MM_realloc(vec_bufs, vec_arr_nalloc * sizeof(*vec_bufs) * 2))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory reallocation failed for buffer list") + vec_bufs = tmp_ptr; + } + + /* Record that we've doubled the array sizes */ + vec_arr_nalloc *= 2; + } + + /* Add this segment to vector read list */ + addrs[vec_arr_nused] = offsets[i] + file_off[file_seq_i]; + sizes[vec_arr_nused] = io_len; + vec_bufs[vec_arr_nused] = (void *)((uint8_t *)buf + mem_off[mem_seq_i]); + vec_arr_nused++; + } + else + /* Issue scalar read call */ + if ((file->cls->read)(file, type, dxpl_id, offsets[i] + file_off[file_seq_i], io_len, + (void *)((uint8_t *)buf + mem_off[mem_seq_i])) < 0) + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "driver read request failed") + + /* Update file sequence */ + if (io_len == file_len[file_seq_i]) + file_seq_i++; + else { + file_off[file_seq_i] += io_len; + file_len[file_seq_i] -= io_len; + } + + /* Update memory sequence */ + if (io_len == mem_len[mem_seq_i]) + mem_seq_i++; + else { + mem_off[mem_seq_i] += io_len; + mem_len[mem_seq_i] -= io_len; + } + } + + if (mem_seq_i < mem_nseq) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADVALUE, FAIL, "file selection terminated before memory selection") + + /* Terminate iterators */ + if (H5S_SELECT_ITER_RELEASE(&file_iter) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_CANTFREE, FAIL, "can't release file selection iterator") + if (H5S_SELECT_ITER_RELEASE(&mem_iter) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_CANTFREE, FAIL, "can't release memory selection iterator") + } + + /* Issue vector read call if appropriate */ + if (use_vector) { + H5_CHECK_OVERFLOW(vec_arr_nused, size_t, uint32_t) + if ((file->cls->read_vector)(file, dxpl_id, (uint32_t)vec_arr_nused, types, addrs, sizes, vec_bufs) < + 0) + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "driver read vector request failed") + } + +done: + /* Cleanup */ + if (use_vector) { + if (addrs != addrs_static) + addrs = H5MM_xfree(addrs); + if (sizes != sizes_static) + sizes = H5MM_xfree(sizes); + if (vec_bufs != vec_bufs_static) + vec_bufs = H5MM_xfree(vec_bufs); + } + + /* Make sure we cleaned up */ + HDassert(!addrs || addrs == addrs_static); + HDassert(!sizes || sizes == sizes_static); + HDassert(!vec_bufs || vec_bufs == vec_bufs_static); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD__read_selection_translate() */ + +/*------------------------------------------------------------------------- + * Function: H5FD_read_selection + * + * Purpose: Private version of H5FDread_selection() + * + * Perform count reads from the specified file at the + * locations selected in the dataspaces in the file_spaces + * array, with each of those dataspaces starting at the file + * address specified by the corresponding element of the + * offsets array, and with the size of each element in the + * dataspace specified by the corresponding element of the + * element_sizes array. The memory type provided by type is + * the same for all selections. Data read is returned in + * the locations selected in the dataspaces in the + * mem_spaces array, within the buffers provided in the + * corresponding elements of the bufs array. + * + * If i > 0 and element_sizes[i] == 0, presume + * element_sizes[n] = element_sizes[i-1] for all n >= i and + * < count. + * + * If the underlying VFD supports selection reads, pass the + * call through directly. + * + * If it doesn't, convert the vector read into a sequence + * of individual reads. + * + * Return: Success: SUCCEED + * All reads have completed successfully, and + * the results havce been into the supplied + * buffers. + * + * Failure: FAIL + * The contents of supplied buffers are undefined. + * + * Programmer: NAF -- 3/29/21 + * + * Changes: None + * + *------------------------------------------------------------------------- + */ +herr_t +H5FD_read_selection(H5FD_t *file, H5FD_mem_t type, uint32_t count, const H5S_t *const *mem_spaces, + const H5S_t *const *file_spaces, haddr_t offsets[], size_t element_sizes[], + void *bufs[] /* out */) +{ + hbool_t offsets_cooked = FALSE; + hid_t mem_space_ids_static[8]; + hid_t * mem_space_ids = mem_space_ids_static; + hid_t file_space_ids_static[8]; + hid_t * file_space_ids = file_space_ids_static; + uint32_t num_spaces = 0; + hid_t dxpl_id = H5I_INVALID_HID; /* DXPL for operation */ + uint32_t i; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + /* Sanity checks */ + HDassert(file); + HDassert(file->cls); + HDassert((mem_spaces) || (count == 0)); + HDassert((file_spaces) || (count == 0)); + HDassert((offsets) || (count == 0)); + HDassert((element_sizes) || (count == 0)); + HDassert((bufs) || (count == 0)); + + /* Verify that the first elements of the element_sizes and bufs arrays are + * valid. */ + HDassert((count == 0) || (element_sizes[0] != 0)); + HDassert((count == 0) || (bufs[0] != NULL)); + + /* Get proper DXPL for I/O */ + dxpl_id = H5CX_get_dxpl(); + +#ifndef H5_HAVE_PARALLEL + /* The no-op case + * + * Do not return early for Parallel mode since the I/O could be a + * collective transfer. + */ + if (0 == count) { + HGOTO_DONE(SUCCEED) + } +#endif /* H5_HAVE_PARALLEL */ + + if (file->base_addr > 0) { + + /* apply the base_addr offset to the offsets array. Must undo before + * we return. + */ + for (i = 0; i < count; i++) { + + offsets[i] += file->base_addr; + } + offsets_cooked = TRUE; + } + + /* If the file is open for SWMR read access, allow access to data past + * the end of the allocated space (the 'eoa'). This is done because the + * eoa stored in the file's superblock might be out of sync with the + * objects being written within the file by the application performing + * SWMR write operations. + */ + /* For now at least, only check that the offset is not past the eoa, since + * looking into the highest offset in the selection (different from the + * bounds) is potentially expensive. + */ + if (!(file->access_flags & H5F_ACC_SWMR_READ)) { + haddr_t eoa; + + if (HADDR_UNDEF == (eoa = (file->cls->get_eoa)(file, type))) + HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "driver get_eoa request failed") + + for (i = 0; i < count; i++) { + + if ((offsets[i]) > eoa) + + HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "addr overflow, offsets[%d] = %llu, eoa = %llu", + (int)i, (unsigned long long)(offsets[i]), (unsigned long long)eoa) + } + } + + /* if the underlying VFD supports selection read, make the call */ + if (file->cls->read_selection) { + /* Allocate array of space IDs if necessary, otherwise use static + * buffers */ + if (count > sizeof(mem_space_ids_static) / sizeof(mem_space_ids_static[0])) { + if (NULL == (mem_space_ids = H5MM_malloc(count * sizeof(hid_t)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "memory allocation failed for dataspace list") + if (NULL == (file_space_ids = H5MM_malloc(count * sizeof(hid_t)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "memory allocation failed for dataspace list") + } + + /* Create IDs for all dataspaces */ + for (; num_spaces < count; num_spaces++) { + if ((mem_space_ids[num_spaces] = H5I_register(H5I_DATASPACE, mem_spaces[num_spaces], TRUE)) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTREGISTER, FAIL, "unable to register dataspace ID") + + if ((file_space_ids[num_spaces] = H5I_register(H5I_DATASPACE, file_spaces[num_spaces], TRUE)) < + 0) { + if (H5I_dec_app_ref(mem_space_ids[num_spaces]) < 0) + HDONE_ERROR(H5E_VFL, H5E_CANTDEC, FAIL, "problem freeing id") + HGOTO_ERROR(H5E_VFL, H5E_CANTREGISTER, FAIL, "unable to register dataspace ID") + } + } + + if ((file->cls->read_selection)(file, type, dxpl_id, count, mem_space_ids, file_space_ids, offsets, + element_sizes, bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "driver read selection request failed") + } + else + /* Otherwise, implement the selection read as a sequence of regular + * or vector read calls. + */ + if (H5FD__read_selection_translate(file, type, dxpl_id, count, mem_spaces, file_spaces, offsets, + element_sizes, bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "translation to vector or scalar read failed") + +done: + /* undo the base addr offset to the offsets array if necessary */ + if (offsets_cooked) { + + HDassert(file->base_addr > 0); + + for (i = 0; i < count; i++) { + + offsets[i] -= file->base_addr; + } + } + + /* Cleanup dataspace arrays */ + for (i = 0; i < num_spaces; i++) { + if (H5I_dec_app_ref(mem_space_ids[i]) < 0) + HDONE_ERROR(H5E_VFL, H5E_CANTDEC, FAIL, "problem freeing id") + if (H5I_dec_app_ref(file_space_ids[i]) < 0) + HDONE_ERROR(H5E_VFL, H5E_CANTDEC, FAIL, "problem freeing id") + } + if (mem_space_ids != mem_space_ids_static) + mem_space_ids = H5MM_xfree(mem_space_ids); + if (file_space_ids != file_space_ids_static) + file_space_ids = H5MM_xfree(file_space_ids); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_read_selection() */ + +/*------------------------------------------------------------------------- + * Function: H5FD_read_selection_id + * + * Purpose: Like H5FD_read_selection(), but takes hid_t arrays instead + * of H5S_t * arrays for the dataspaces. + * + * Return: Success: SUCCEED + * All reads have completed successfully, and + * the results havce been into the supplied + * buffers. + * + * Failure: FAIL + * The contents of supplied buffers are undefined. + * + * Programmer: NAF -- 5/19/21 + * + * Changes: None + * + *------------------------------------------------------------------------- + */ +herr_t +H5FD_read_selection_id(H5FD_t *file, H5FD_mem_t type, uint32_t count, hid_t mem_space_ids[], + hid_t file_space_ids[], haddr_t offsets[], size_t element_sizes[], + void *bufs[] /* out */) +{ + hbool_t offsets_cooked = FALSE; + H5S_t * mem_spaces_static[8]; + H5S_t ** mem_spaces = mem_spaces_static; + H5S_t * file_spaces_static[8]; + H5S_t ** file_spaces = file_spaces_static; + hid_t dxpl_id = H5I_INVALID_HID; /* DXPL for operation */ + uint32_t i; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + /* Sanity checks */ + HDassert(file); + HDassert(file->cls); + HDassert((mem_space_ids) || (count == 0)); + HDassert((file_space_ids) || (count == 0)); + HDassert((offsets) || (count == 0)); + HDassert((element_sizes) || (count == 0)); + HDassert((bufs) || (count == 0)); + + /* Verify that the first elements of the element_sizes and bufs arrays are + * valid. */ + HDassert((count == 0) || (element_sizes[0] != 0)); + HDassert((count == 0) || (bufs[0] != NULL)); + + /* Get proper DXPL for I/O */ + dxpl_id = H5CX_get_dxpl(); + +#ifndef H5_HAVE_PARALLEL + /* The no-op case + * + * Do not return early for Parallel mode since the I/O could be a + * collective transfer. + */ + if (0 == count) { + HGOTO_DONE(SUCCEED) + } +#endif /* H5_HAVE_PARALLEL */ + + if (file->base_addr > 0) { + + /* apply the base_addr offset to the offsets array. Must undo before + * we return. + */ + for (i = 0; i < count; i++) { + + offsets[i] += file->base_addr; + } + offsets_cooked = TRUE; + } + + /* If the file is open for SWMR read access, allow access to data past + * the end of the allocated space (the 'eoa'). This is done because the + * eoa stored in the file's superblock might be out of sync with the + * objects being written within the file by the application performing + * SWMR write operations. + */ + /* For now at least, only check that the offset is not past the eoa, since + * looking into the highest offset in the selection (different from the + * bounds) is potentially expensive. + */ + if (!(file->access_flags & H5F_ACC_SWMR_READ)) { + haddr_t eoa; + + if (HADDR_UNDEF == (eoa = (file->cls->get_eoa)(file, type))) + HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "driver get_eoa request failed") + + for (i = 0; i < count; i++) { + + if ((offsets[i]) > eoa) + + HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "addr overflow, offsets[%d] = %llu, eoa = %llu", + (int)i, (unsigned long long)(offsets[i]), (unsigned long long)eoa) + } + } + + /* if the underlying VFD supports selection read, make the call */ + if (file->cls->read_selection) { + if ((file->cls->read_selection)(file, type, dxpl_id, count, mem_space_ids, file_space_ids, offsets, + element_sizes, bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "driver read selection request failed") + } + else { + /* Otherwise, implement the selection read as a sequence of regular + * or vector read calls. + */ + + /* Allocate arrays of space objects if necessary, otherwise use static + * buffers */ + if (count > sizeof(mem_spaces_static) / sizeof(mem_spaces_static[0])) { + if (NULL == (mem_spaces = H5MM_malloc(count * sizeof(H5S_t *)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "memory allocation failed for dataspace list") + if (NULL == (file_spaces = H5MM_malloc(count * sizeof(H5S_t *)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "memory allocation failed for dataspace list") + } + + /* Get object pointers for all dataspaces */ + for (i = 0; i < count; i++) { + if (NULL == (mem_spaces[i] = (H5S_t *)H5I_object_verify(mem_space_ids[i], H5I_DATASPACE))) + HGOTO_ERROR(H5E_VFL, H5E_BADTYPE, H5I_INVALID_HID, "can't retrieve memory dataspace from ID") + if (NULL == (file_spaces[i] = (H5S_t *)H5I_object_verify(file_space_ids[i], H5I_DATASPACE))) + HGOTO_ERROR(H5E_VFL, H5E_BADTYPE, H5I_INVALID_HID, "can't retrieve file dataspace from ID") + } + + /* Translate to vector or scalar I/O */ + if (H5FD__read_selection_translate(file, type, dxpl_id, count, (const H5S_t *const *)mem_spaces, + (const H5S_t *const *)file_spaces, offsets, element_sizes, + bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "translation to vector or scalar read failed") + } + +done: + /* undo the base addr offset to the offsets array if necessary */ + if (offsets_cooked) { + + HDassert(file->base_addr > 0); + + for (i = 0; i < count; i++) { + + offsets[i] -= file->base_addr; + } + } + + /* Cleanup dataspace arrays */ + if (mem_spaces != mem_spaces_static) + mem_spaces = H5MM_xfree(mem_spaces); + if (file_spaces != file_spaces_static) + file_spaces = H5MM_xfree(file_spaces); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_read_selection_id() */ + +/*------------------------------------------------------------------------- + * Function: H5FD__write_selection_translate + * + * Purpose: Translates a selection write call to a vector write call + * if vector writes are supported, or a series of scalar + * write calls otherwise. + * + * Return: Success: SUCCEED + * All writes have completed successfully. + * + * Failure: FAIL + * One or more writes failed. + * + * Programmer: NAF -- 5/13/21 + * + * Changes: None + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD__write_selection_translate(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, uint32_t count, + const H5S_t *const *mem_spaces, const H5S_t *const *file_spaces, + haddr_t offsets[], size_t element_sizes[], const void *bufs[]) +{ + hbool_t extend_sizes = FALSE; + hbool_t extend_bufs = FALSE; + uint32_t i; + size_t element_size; + const void * buf; + hbool_t use_vector = FALSE; + haddr_t addrs_static[8]; + haddr_t * addrs = addrs_static; + size_t sizes_static[8]; + size_t * sizes = sizes_static; + const void * vec_bufs_static[8]; + const void ** vec_bufs = vec_bufs_static; + hsize_t file_off[H5FD_SEQ_LIST_LEN]; + size_t file_len[H5FD_SEQ_LIST_LEN]; + hsize_t mem_off[H5FD_SEQ_LIST_LEN]; + size_t mem_len[H5FD_SEQ_LIST_LEN]; + size_t file_seq_i; + size_t mem_seq_i; + size_t file_nseq; + size_t mem_nseq; + size_t io_len; + size_t nelmts; + hssize_t hss_nelmts; + size_t seq_nelem; + H5S_sel_iter_t file_iter; + H5S_sel_iter_t mem_iter; + H5FD_mem_t types[2] = {type, H5FD_MEM_NOLIST}; + size_t vec_arr_nalloc = sizeof(addrs_static) / sizeof(addrs_static[0]); + size_t vec_arr_nused = 0; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI(FAIL) + + /* Sanity checks */ + HDassert(file); + HDassert(file->cls); + HDassert(vec_arr_nalloc == sizeof(sizes_static) / sizeof(sizes_static[0])); + HDassert(vec_arr_nalloc == sizeof(vec_bufs_static) / sizeof(vec_bufs_static[0])); + HDassert(mem_spaces); + HDassert(file_spaces); + HDassert(offsets); + HDassert(element_sizes); + HDassert(bufs); + + /* Verify that the first elements of the element_sizes and bufs arrays are + * valid. */ + HDassert(element_sizes[0] != 0); + HDassert(bufs[0] != NULL); + + /* Check if we're using vector I/O */ + use_vector = file->cls->write_vector != NULL; + + /* Loop over dataspaces */ + for (i = 0; i < count; i++) { + + /* we have already verified that element_sizes[0] != 0 and bufs[0] + * != NULL */ + + if (!extend_sizes) { + + if (element_sizes[i] == 0) { + + extend_sizes = TRUE; + element_size = element_sizes[i - 1]; + } + else { + + element_size = element_sizes[i]; + } + } + + if (!extend_bufs) { + + if (bufs[i] == NULL) { + + extend_bufs = TRUE; + buf = bufs[i - 1]; + } + else { + + buf = bufs[i]; + } + } + + /* Initialize sequence lists for memory and file spaces */ + if (H5S_select_iter_init(&file_iter, file_spaces[i], element_size, 0) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "can't initialize sequence list for file space") + if (H5S_select_iter_init(&mem_iter, mem_spaces[i], element_size, 0) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "can't initialize sequence list for memory space") + + /* Get the number of elements in selection */ + if ((hss_nelmts = (hssize_t)H5S_GET_SELECT_NPOINTS(file_spaces[i])) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTCOUNT, FAIL, "can't get number of elements selected") + H5_CHECKED_ASSIGN(nelmts, size_t, hss_nelmts, hssize_t); + +#ifndef NDEBUG + /* Verify mem space has the same number of elements */ + { + if ((hss_nelmts = (hssize_t)H5S_GET_SELECT_NPOINTS(mem_spaces[i])) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTCOUNT, FAIL, "can't get number of elements selected") + HDassert((hssize_t)nelmts == hss_nelmts); + } +#endif /* NDEBUG */ + + /* Initialize values so sequence lists are retrieved on the first + * iteration */ + file_seq_i = H5FD_SEQ_LIST_LEN; + mem_seq_i = H5FD_SEQ_LIST_LEN; + file_nseq = 0; + mem_nseq = 0; + + /* Loop until all elements are processed */ + while (file_seq_i < file_nseq || nelmts > 0) { + /* Fill/refill file sequence list if necessary */ + if (file_seq_i == H5FD_SEQ_LIST_LEN) { + if (H5S_SELECT_ITER_GET_SEQ_LIST(&file_iter, H5FD_SEQ_LIST_LEN, SIZE_MAX, &file_nseq, + &seq_nelem, file_off, file_len) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_UNSUPPORTED, FAIL, "sequence length generation failed") + HDassert(file_nseq > 0); + + nelmts -= seq_nelem; + file_seq_i = 0; + } + HDassert(file_seq_i < file_nseq); + + /* Fill/refill memory sequence list if necessary */ + if (mem_seq_i == H5FD_SEQ_LIST_LEN) { + if (H5S_SELECT_ITER_GET_SEQ_LIST(&mem_iter, H5FD_SEQ_LIST_LEN, SIZE_MAX, &mem_nseq, + &seq_nelem, mem_off, mem_len) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_UNSUPPORTED, FAIL, "sequence length generation failed") + HDassert(mem_nseq > 0); + + mem_seq_i = 0; + } + HDassert(mem_seq_i < mem_nseq); + + /* Calculate length of this IO */ + io_len = MIN(file_len[file_seq_i], mem_len[mem_seq_i]); + + /* Check if we're using vector I/O */ + if (use_vector) { + /* Check if we need to extend the arrays */ + if (vec_arr_nused == vec_arr_nalloc) { + /* Check if we're using the static arrays */ + if (addrs == addrs_static) { + HDassert(sizes == sizes_static); + HDassert(vec_bufs == vec_bufs_static); + + /* Allocate dynamic arrays */ + if (NULL == (addrs = H5MM_malloc(sizeof(addrs_static) * 2))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for address list") + if (NULL == (sizes = H5MM_malloc(sizeof(sizes_static) * 2))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for size list") + if (NULL == (vec_bufs = H5MM_malloc(sizeof(vec_bufs_static) * 2))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory allocation failed for buffer list") + + /* Copy the existing data */ + (void)H5MM_memcpy(addrs, addrs_static, sizeof(addrs_static)); + (void)H5MM_memcpy(sizes, sizes_static, sizeof(sizes_static)); + (void)H5MM_memcpy(vec_bufs, vec_bufs_static, sizeof(vec_bufs_static)); + } + else { + void *tmp_ptr; + + /* Reallocate arrays */ + if (NULL == (tmp_ptr = H5MM_realloc(addrs, vec_arr_nalloc * sizeof(*addrs) * 2))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory reallocation failed for address list") + addrs = tmp_ptr; + if (NULL == (tmp_ptr = H5MM_realloc(sizes, vec_arr_nalloc * sizeof(*sizes) * 2))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory reallocation failed for size list") + sizes = tmp_ptr; + if (NULL == + (tmp_ptr = H5MM_realloc(vec_bufs, vec_arr_nalloc * sizeof(*vec_bufs) * 2))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "memory reallocation failed for buffer list") + vec_bufs = tmp_ptr; + } + + /* Record that we've doubled the array sizes */ + vec_arr_nalloc *= 2; + } + + /* Add this segment to vector write list */ + addrs[vec_arr_nused] = offsets[i] + file_off[file_seq_i]; + sizes[vec_arr_nused] = io_len; + vec_bufs[vec_arr_nused] = (const void *)((const uint8_t *)buf + mem_off[mem_seq_i]); + vec_arr_nused++; + } + else + /* Issue scalar write call */ + if ((file->cls->write)(file, type, dxpl_id, offsets[i] + file_off[file_seq_i], io_len, + (const void *)((const uint8_t *)buf + mem_off[mem_seq_i])) < 0) + HGOTO_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "driver write request failed") + + /* Update file sequence */ + if (io_len == file_len[file_seq_i]) + file_seq_i++; + else { + file_off[file_seq_i] += io_len; + file_len[file_seq_i] -= io_len; + } + + /* Update memory sequence */ + if (io_len == mem_len[mem_seq_i]) + mem_seq_i++; + else { + mem_off[mem_seq_i] += io_len; + mem_len[mem_seq_i] -= io_len; + } + } + + if (mem_seq_i < mem_nseq) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADVALUE, FAIL, "file selection terminated before memory selection") + + /* Terminate iterators */ + if (H5S_SELECT_ITER_RELEASE(&file_iter) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_CANTFREE, FAIL, "can't release file selection iterator") + if (H5S_SELECT_ITER_RELEASE(&mem_iter) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_CANTFREE, FAIL, "can't release memory selection iterator") + } + + /* Issue vector write call if appropriate */ + if (use_vector) { + H5_CHECK_OVERFLOW(vec_arr_nused, size_t, uint32_t) + if ((file->cls->write_vector)(file, dxpl_id, (uint32_t)vec_arr_nused, types, addrs, sizes, vec_bufs) < + 0) + HGOTO_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "driver write vector request failed") + } + +done: + /* Cleanup */ + if (use_vector) { + if (addrs != addrs_static) + addrs = H5MM_xfree(addrs); + if (sizes != sizes_static) + sizes = H5MM_xfree(sizes); + if (vec_bufs != vec_bufs_static) + vec_bufs = H5MM_xfree(vec_bufs); + } + + /* Make sure we cleaned up */ + HDassert(!addrs || addrs == addrs_static); + HDassert(!sizes || sizes == sizes_static); + HDassert(!vec_bufs || vec_bufs == vec_bufs_static); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD__write_selection_translate() */ + +/*------------------------------------------------------------------------- + * Function: H5FD_write_selection + * + * Purpose: Private version of H5FDwrite_selection() + * + * Perform count writes to the specified file at the + * locations selected in the dataspaces in the file_spaces + * array, with each of those dataspaces starting at the file + * address specified by the corresponding element of the + * offsets array, and with the size of each element in the + * dataspace specified by the corresponding element of the + * element_sizes array. The memory type provided by type is + * the same for all selections. Data write is from + * the locations selected in the dataspaces in the + * mem_spaces array, within the buffers provided in the + * corresponding elements of the bufs array. + * + * If i > 0 and element_sizes[i] == 0, presume + * element_sizes[n] = element_sizes[i-1] for all n >= i and + * < count. + * + * If the underlying VFD supports selection reads, pass the + * call through directly. + * + * If it doesn't, convert the vector write into a sequence + * of individual writes. + * + * Return: Success: SUCCEED + * All writes have completed successfully. + * + * Failure: FAIL + * One or more writes failed. + * + * Programmer: NAF -- 3/29/21 + * + * Changes: None + * + *------------------------------------------------------------------------- + */ +herr_t +H5FD_write_selection(H5FD_t *file, H5FD_mem_t type, uint32_t count, const H5S_t *const *mem_spaces, + const H5S_t *const *file_spaces, haddr_t offsets[], size_t element_sizes[], + const void *bufs[]) +{ + hbool_t offsets_cooked = FALSE; + hid_t mem_space_ids_static[8]; + hid_t * mem_space_ids = mem_space_ids_static; + hid_t file_space_ids_static[8]; + hid_t * file_space_ids = file_space_ids_static; + uint32_t num_spaces = 0; + hid_t dxpl_id = H5I_INVALID_HID; /* DXPL for operation */ + uint32_t i; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + /* Sanity checks */ + HDassert(file); + HDassert(file->cls); + HDassert((mem_spaces) || (count == 0)); + HDassert((file_spaces) || (count == 0)); + HDassert((offsets) || (count == 0)); + HDassert((element_sizes) || (count == 0)); + HDassert((bufs) || (count == 0)); + + /* Verify that the first elements of the element_sizes and bufs arrays are + * valid. */ + HDassert((count == 0) || (element_sizes[0] != 0)); + HDassert((count == 0) || (bufs[0] != NULL)); + + /* Get proper DXPL for I/O */ + dxpl_id = H5CX_get_dxpl(); + +#ifndef H5_HAVE_PARALLEL + /* The no-op case + * + * Do not return early for Parallel mode since the I/O could be a + * collective transfer. + */ + if (0 == count) { + HGOTO_DONE(SUCCEED) + } +#endif /* H5_HAVE_PARALLEL */ + + if (file->base_addr > 0) { + + /* apply the base_addr offset to the offsets array. Must undo before + * we return. + */ + for (i = 0; i < count; i++) { + + offsets[i] += file->base_addr; + } + offsets_cooked = TRUE; + } + + /* For now at least, only check that the offset is not past the eoa, since + * looking into the highest offset in the selection (different from the + * bounds) is potentially expensive. + */ + { + haddr_t eoa; + + if (HADDR_UNDEF == (eoa = (file->cls->get_eoa)(file, type))) + HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "driver get_eoa request failed") + + for (i = 0; i < count; i++) { + + if ((offsets[i]) > eoa) + + HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "addr overflow, offsets[%d] = %llu, eoa = %llu", + (int)i, (unsigned long long)(offsets[i]), (unsigned long long)eoa) + } + } + + /* if the underlying VFD supports selection write, make the call */ + if (file->cls->write_selection) { + /* Allocate array of space IDs if necessary, otherwise use static + * buffers */ + if (count > sizeof(mem_space_ids_static) / sizeof(mem_space_ids_static[0])) { + if (NULL == (mem_space_ids = H5MM_malloc(count * sizeof(hid_t)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "memory allocation failed for dataspace list") + if (NULL == (file_space_ids = H5MM_malloc(count * sizeof(hid_t)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "memory allocation failed for dataspace list") + } + + /* Create IDs for all dataspaces */ + for (; num_spaces < count; num_spaces++) { + if ((mem_space_ids[num_spaces] = H5I_register(H5I_DATASPACE, mem_spaces[num_spaces], TRUE)) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTREGISTER, FAIL, "unable to register dataspace ID") + + if ((file_space_ids[num_spaces] = H5I_register(H5I_DATASPACE, file_spaces[num_spaces], TRUE)) < + 0) { + if (H5I_dec_app_ref(mem_space_ids[num_spaces]) < 0) + HDONE_ERROR(H5E_VFL, H5E_CANTDEC, FAIL, "problem freeing id") + HGOTO_ERROR(H5E_VFL, H5E_CANTREGISTER, FAIL, "unable to register dataspace ID") + } + } + + if ((file->cls->write_selection)(file, type, dxpl_id, count, mem_space_ids, file_space_ids, offsets, + element_sizes, bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "driver write selection request failed") + } + else + /* Otherwise, implement the selection write as a sequence of regular + * or vector write calls. + */ + if (H5FD__write_selection_translate(file, type, dxpl_id, count, mem_spaces, file_spaces, offsets, + element_sizes, bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "translation to vector or scalar write failed") + +done: + /* undo the base addr offset to the offsets array if necessary */ + if (offsets_cooked) { + + HDassert(file->base_addr > 0); + + for (i = 0; i < count; i++) { + + offsets[i] -= file->base_addr; + } + } + + /* Cleanup dataspace arrays */ + for (i = 0; i < num_spaces; i++) { + if (H5I_dec_app_ref(mem_space_ids[i]) < 0) + HDONE_ERROR(H5E_VFL, H5E_CANTDEC, FAIL, "problem freeing id") + if (H5I_dec_app_ref(file_space_ids[i]) < 0) + HDONE_ERROR(H5E_VFL, H5E_CANTDEC, FAIL, "problem freeing id") + } + if (mem_space_ids != mem_space_ids_static) + mem_space_ids = H5MM_xfree(mem_space_ids); + if (file_space_ids != file_space_ids_static) + file_space_ids = H5MM_xfree(file_space_ids); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_write_selection() */ + +/*------------------------------------------------------------------------- + * Function: H5FD_write_selection_id + * + * Purpose: Like H5FD_write_selection(), but takes hid_t arrays + * instead of H5S_t * arrays for the dataspaces. + * + * Return: Success: SUCCEED + * All writes have completed successfully. + * + * Failure: FAIL + * One or more writes failed. + * + * Programmer: NAF -- 5/19/21 + * + * Changes: None + * + *------------------------------------------------------------------------- + */ +herr_t +H5FD_write_selection_id(H5FD_t *file, H5FD_mem_t type, uint32_t count, hid_t mem_space_ids[], + hid_t file_space_ids[], haddr_t offsets[], size_t element_sizes[], const void *bufs[]) +{ + hbool_t offsets_cooked = FALSE; + H5S_t * mem_spaces_static[8]; + H5S_t ** mem_spaces = mem_spaces_static; + H5S_t * file_spaces_static[8]; + H5S_t ** file_spaces = file_spaces_static; + hid_t dxpl_id = H5I_INVALID_HID; /* DXPL for operation */ + uint32_t i; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + /* Sanity checks */ + HDassert(file); + HDassert(file->cls); + HDassert((mem_space_ids) || (count == 0)); + HDassert((file_space_ids) || (count == 0)); + HDassert((offsets) || (count == 0)); + HDassert((element_sizes) || (count == 0)); + HDassert((bufs) || (count == 0)); + + /* Verify that the first elements of the element_sizes and bufs arrays are + * valid. */ + HDassert((count == 0) || (element_sizes[0] != 0)); + HDassert((count == 0) || (bufs[0] != NULL)); + + /* Get proper DXPL for I/O */ + dxpl_id = H5CX_get_dxpl(); + +#ifndef H5_HAVE_PARALLEL + /* The no-op case + * + * Do not return early for Parallel mode since the I/O could be a + * collective transfer. + */ + if (0 == count) { + HGOTO_DONE(SUCCEED) + } +#endif /* H5_HAVE_PARALLEL */ + + if (file->base_addr > 0) { + + /* apply the base_addr offset to the offsets array. Must undo before + * we return. + */ + for (i = 0; i < count; i++) { + + offsets[i] += file->base_addr; + } + offsets_cooked = TRUE; + } + + /* For now at least, only check that the offset is not past the eoa, since + * looking into the highest offset in the selection (different from the + * bounds) is potentially expensive. + */ + { + haddr_t eoa; + + if (HADDR_UNDEF == (eoa = (file->cls->get_eoa)(file, type))) + HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "driver get_eoa request failed") + + for (i = 0; i < count; i++) { + + if ((offsets[i]) > eoa) + + HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "addr overflow, offsets[%d] = %llu, eoa = %llu", + (int)i, (unsigned long long)(offsets[i]), (unsigned long long)eoa) + } + } + + /* if the underlying VFD supports selection write, make the call */ + if (file->cls->write_selection) { + if ((file->cls->write_selection)(file, type, dxpl_id, count, mem_space_ids, file_space_ids, offsets, + element_sizes, bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "driver write selection request failed") + } + else { + /* Otherwise, implement the selection write as a sequence of regular + * or vector write calls. + */ + + /* Allocate arrays of space objects if necessary, otherwise use static + * buffers */ + if (count > sizeof(mem_spaces_static) / sizeof(mem_spaces_static[0])) { + if (NULL == (mem_spaces = H5MM_malloc(count * sizeof(H5S_t *)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "memory allocation failed for dataspace list") + if (NULL == (file_spaces = H5MM_malloc(count * sizeof(H5S_t *)))) + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "memory allocation failed for dataspace list") + } + + /* Get object pointers for all dataspaces */ + for (i = 0; i < count; i++) { + if (NULL == (mem_spaces[i] = (H5S_t *)H5I_object_verify(mem_space_ids[i], H5I_DATASPACE))) + HGOTO_ERROR(H5E_VFL, H5E_BADTYPE, H5I_INVALID_HID, "can't retrieve memory dataspace from ID") + if (NULL == (file_spaces[i] = (H5S_t *)H5I_object_verify(file_space_ids[i], H5I_DATASPACE))) + HGOTO_ERROR(H5E_VFL, H5E_BADTYPE, H5I_INVALID_HID, "can't retrieve file dataspace from ID") + } + + /* Translate to vector or scalar I/O */ + if (H5FD__write_selection_translate(file, type, dxpl_id, count, (const H5S_t *const *)mem_spaces, + (const H5S_t *const *)file_spaces, offsets, element_sizes, + bufs) < 0) + HGOTO_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "translation to vector or scalar write failed") + } + +done: + /* undo the base addr offset to the offsets array if necessary */ + if (offsets_cooked) { + + HDassert(file->base_addr > 0); + + for (i = 0; i < count; i++) { + + offsets[i] -= file->base_addr; + } + } + + /* Cleanup dataspace arrays */ + if (mem_spaces != mem_spaces_static) + mem_spaces = H5MM_xfree(mem_spaces); + if (file_spaces != file_spaces_static) + file_spaces = H5MM_xfree(file_spaces); + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD_write_selection_id() */ + +/*------------------------------------------------------------------------- * Function: H5FD_set_eoa * * Purpose: Private version of H5FDset_eoa() @@ -383,6 +2054,262 @@ H5FD_driver_query(const H5FD_class_t *driver, unsigned long *flags /*out*/) } /* end H5FD_driver_query() */ /*------------------------------------------------------------------------- + * Function: H5FD_sort_vector_io_req + * + * Purpose: Determine whether the supplied vector I/O request is + * sorted. + * + * if is is, set *vector_was_sorted to TRUE, set: + * + * *s_types_ptr = types + * *s_addrs_ptr = addrs + * *s_sizes_ptr = sizes + * *s_bufs_ptr = bufs + * + * and return. + * + * If it is not sorted, duplicate the type, addrs, sizes, + * and bufs vectors, storing the base addresses of the new + * vectors in *s_types_ptr, *s_addrs_ptr, *s_sizes_ptr, and + * *s_bufs_ptr respectively. Determine the sorted order + * of the vector I/O request, and load it into the new + * vectors in sorted order. + * + * Note that in this case, it is the callers responsibility + * to free the sorted vectors. + * + * JRM -- 3/15/21 + * + * Return: SUCCEED/FAIL + * + *------------------------------------------------------------------------- + */ + +static int +H5FD__vsrt_tmp_cmp(const void *element_1, const void *element_2) +{ + haddr_t addr_1 = ((const H5FD_vsrt_tmp_t *)element_1)->addr; + haddr_t addr_2 = ((const H5FD_vsrt_tmp_t *)element_2)->addr; + int ret_value = 0; /* Return value */ + + FUNC_ENTER_STATIC_NOERR + + /* Sanity checks */ + HDassert(H5F_addr_defined(addr_1)); + HDassert(H5F_addr_defined(addr_2)); + + if (H5F_addr_gt(addr_1, addr_2)) { + + ret_value = 1; + } + else if (H5F_addr_lt(addr_1, addr_2)) { + + ret_value = -1; + } + + FUNC_LEAVE_NOAPI(ret_value) +} /* H5FD__vsrt_tmp_cmp() */ + +herr_t +H5FD_sort_vector_io_req(hbool_t *vector_was_sorted, uint32_t count, H5FD_mem_t types[], haddr_t addrs[], + size_t sizes[], const void *bufs[], H5FD_mem_t **s_types_ptr, haddr_t **s_addrs_ptr, + size_t **s_sizes_ptr, void ***s_bufs_ptr) +{ + herr_t ret_value = SUCCEED; /* Return value */ + int i; + struct H5FD_vsrt_tmp_t *srt_tmp = NULL; + + FUNC_ENTER_NOAPI(FAIL) + + /* Sanity checks */ + + HDassert(vector_was_sorted); + + HDassert((types) || (count == 0)); + HDassert((addrs) || (count == 0)); + HDassert((sizes) || (count == 0)); + HDassert((bufs) || (count == 0)); + + /* verify that the first elements of the sizes and types arrays are + * valid. + */ + HDassert((count == 0) || (sizes[0] != 0)); + HDassert((count == 0) || (types[0] != H5FD_MEM_NOLIST)); + + HDassert((count == 0) || ((s_types_ptr) && (NULL == *s_types_ptr))); + HDassert((count == 0) || ((s_addrs_ptr) && (NULL == *s_addrs_ptr))); + HDassert((count == 0) || ((s_sizes_ptr) && (NULL == *s_sizes_ptr))); + HDassert((count == 0) || ((s_bufs_ptr) && (NULL == *s_bufs_ptr))); + + *vector_was_sorted = TRUE; + + /* if count <= 1, vector is sorted by definition */ + if (count > 1) { + + /* scan the addrs array to see if it is sorted */ + i = 1; + + while ((*vector_was_sorted) && (i < (int)(count - 1))) { + + if (H5F_addr_gt(addrs[i - 1], addrs[i])) { + + *vector_was_sorted = FALSE; + } + else if (H5F_addr_eq(addrs[i - 1], addrs[i])) { + + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "duplicate addr in vector") + } + i++; + } + } + + if (*vector_was_sorted) { + + *s_types_ptr = types; + *s_addrs_ptr = addrs; + *s_sizes_ptr = sizes; + *s_bufs_ptr = bufs; + } + else { + + /* must sort the addrs array in increasing addr order, while + * maintaining the association between each addr, and the + * sizes[], types[], and bufs[] values at the same index. + * + * Do this by allocating an array of struct H5FD_vsrt_tmp_t, where + * each instance of H5FD_vsrt_tmp_t has two fields, addr and index. + * Load the array with the contents of the addrs array and + * the index of the associated entry. Sort the array, allocate + * the s_types_ptr, s_addrs_ptr, s_sizes_ptr, and s_bufs_ptr + * arrays and populate them using the mapping provided by + * the sorted array of H5FD_vsrt_tmp_t. + */ + int j; + int fixed_size_index = (int)count; + int fixed_type_index = (int)count; + size_t srt_tmp_size; + + srt_tmp_size = ((size_t)count * sizeof(struct H5FD_vsrt_tmp_t)); + + if (NULL == (srt_tmp = (H5FD_vsrt_tmp_t *)HDmalloc(srt_tmp_size))) + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't alloc srt_tmp") + + for (i = 0; i < (int)count; i++) { + + srt_tmp[i].addr = addrs[i]; + srt_tmp[i].index = i; + } + + /* sort the srt_tmp array */ + HDqsort(srt_tmp, (size_t)count, sizeof(struct H5FD_vsrt_tmp_t), H5FD__vsrt_tmp_cmp); + + /* verify no duplicate entries */ + i = 1; + + while (i < (int)(count - 1)) { + + HDassert(H5F_addr_lt(srt_tmp[i - 1].addr, srt_tmp[i].addr)); + + if (H5F_addr_eq(addrs[i - 1], addrs[i])) { + + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "duplicate addr in vector") + } + i++; + } + + if ((NULL == (*s_types_ptr = (H5FD_mem_t *)HDmalloc((size_t)count * sizeof(H5FD_mem_t)))) || + (NULL == (*s_addrs_ptr = (haddr_t *)HDmalloc((size_t)count * sizeof(haddr_t)))) || + (NULL == (*s_sizes_ptr = (size_t *)HDmalloc((size_t)count * sizeof(size_t)))) || + (NULL == (*s_bufs_ptr = (void *)HDmalloc((size_t)count * sizeof(void *))))) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't alloc sorted vector(s)") + } + + HDassert(sizes[0] != 0); + HDassert(types[0] != H5FD_MEM_NOLIST); + + /* scan the sizes and types vectors to determine if the fixed size / type + * optimization is in use, and if so, to determine the index of the last + * valid value on each vector. + */ + i = 0; + while ((i < (int)count) && ((fixed_size_index == (int)count) || (fixed_type_index == (int)count))) { + + if ((fixed_size_index == (int)count) && (sizes[i] == 0)) { + + fixed_size_index = i - 1; + } + + if ((fixed_type_index == (int)count) && (types[i] == H5FD_MEM_NOLIST)) { + + fixed_type_index = i - 1; + } + + i++; + } + + HDassert((fixed_size_index >= 0) && (fixed_size_index <= (int)count)); + HDassert((fixed_type_index >= 0) && (fixed_size_index <= (int)count)); + + /* populate the sorted vectors */ + for (i = 0; i < (int)count; i++) { + + j = srt_tmp[i].index; + + (*s_types_ptr)[j] = types[MIN(i, fixed_type_index)]; + (*s_addrs_ptr)[j] = addrs[i]; + (*s_sizes_ptr)[j] = sizes[MIN(i, fixed_size_index)]; + (*s_bufs_ptr)[j] = bufs[i]; + } + } + +done: + if (srt_tmp) { + + HDfree(srt_tmp); + srt_tmp = NULL; + } + + /* On failure, free the sorted vectors if they were allocated. + * Note that we only allocate these vectors if the original array + * was not sorted -- thus we check both for failure, and for + * the flag indicating that the original vector was not sorted + * in increasing address order. + */ + if ((ret_value != SUCCEED) && (!(*vector_was_sorted))) { + + /* free space allocated for sorted vectors */ + if (*s_types_ptr) { + + HDfree(*s_types_ptr); + *s_types_ptr = NULL; + } + + if (*s_addrs_ptr) { + + HDfree(*s_addrs_ptr); + *s_addrs_ptr = NULL; + } + + if (*s_sizes_ptr) { + + HDfree(*s_sizes_ptr); + *s_sizes_ptr = NULL; + } + + if (*s_bufs_ptr) { + + HDfree(*s_bufs_ptr); + *s_bufs_ptr = NULL; + } + } + + FUNC_LEAVE_NOAPI(ret_value) + +} /* end H5FD_sort_vector_io_req() */ + +/*------------------------------------------------------------------------- * Function: H5FD_delete * * Purpose: Private version of H5FDdelete() @@ -402,6 +2329,7 @@ H5FD_delete(const char *filename, hid_t fapl_id) FUNC_ENTER_NOAPI(FAIL) /* Sanity checks */ + HDassert(filename); /* Get file access property list */ diff --git a/src/H5FDlog.c b/src/H5FDlog.c index 87871ab..0364305 100644 --- a/src/H5FDlog.c +++ b/src/H5FDlog.c @@ -207,6 +207,10 @@ static const H5FD_class_t H5FD_log_g = { H5FD__log_get_handle, /* get_handle */ H5FD__log_read, /* read */ H5FD__log_write, /* write */ + NULL, /* read vector */ + NULL, /* write vector */ + NULL, /* read_selection */ + NULL, /* write_selection */ NULL, /* flush */ H5FD__log_truncate, /* truncate */ H5FD__log_lock, /* lock */ diff --git a/src/H5FDmirror.c b/src/H5FDmirror.c index d539f4d..cf3d9ca 100644 --- a/src/H5FDmirror.c +++ b/src/H5FDmirror.c @@ -187,6 +187,10 @@ static const H5FD_class_t H5FD_mirror_g = { NULL, /* get_handle */ H5FD__mirror_read, /* read */ H5FD__mirror_write, /* write */ + NULL, /* read_vector */ + NULL, /* write_vector */ + NULL, /* read_selection */ + NULL, /* write_selection */ NULL, /* flush */ H5FD__mirror_truncate, /* truncate */ H5FD__mirror_lock, /* lock */ diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index 7c85897..445cc65 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -84,6 +84,11 @@ static herr_t H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, ha void *buf); static herr_t H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, size_t size, const void *buf); +static herr_t H5FD__mpio_read_vector(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, uint32_t count, + H5FD_mem_t types[], haddr_t addrs[], size_t sizes[], void *bufs[]); +static herr_t H5FD__mpio_write_vector(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, uint32_t count, + H5FD_mem_t types[], haddr_t addrs[], size_t sizes[], + const void *bufs[]); static herr_t H5FD__mpio_flush(H5FD_t *_file, hid_t dxpl_id, hbool_t closing); static herr_t H5FD__mpio_truncate(H5FD_t *_file, hid_t dxpl_id, hbool_t closing); static herr_t H5FD__mpio_delete(const char *filename, hid_t fapl_id); @@ -119,6 +124,10 @@ static const H5FD_class_t H5FD_mpio_g = { H5FD__mpio_get_handle, /* get_handle */ H5FD__mpio_read, /* read */ H5FD__mpio_write, /* write */ + H5FD__mpio_read_vector, /*read_vector */ + H5FD__mpio_write_vector, /*write_vector */ + NULL, /*read_selection */ + NULL, /*write_selection */ H5FD__mpio_flush, /* flush */ H5FD__mpio_truncate, /* truncate */ NULL, /* lock */ @@ -1584,6 +1593,989 @@ done: } /* end H5FD__mpio_write() */ /*------------------------------------------------------------------------- + * Function: H5FD__mpio_read_vector() + * + * Purpose: The behaviour of this function dependes on the value of + * the io_xfer_mode obtained from the context. + * + * If it is H5FD_MPIO_COLLECTIVE, this is a collective + * operation, which allows us to use MPI_File_set_view, and + * then perform the entire vector read in a single MPI call. + * + * Do this (if count is positive), by constructing memory + * and file derived types from the supplied vector, using + * file type to set the file view, and then reading the + * the memory type from file. Note that this read is + * either independent or collective depending on the + * value of mpio_coll_opt -- again obtained from the context. + * + * If count is zero, participate in the collective read + * (if so configured) with an empty read. + * + * Finally, set the file view back to its default state. + * + * In contrast, if io_xfer_mode is H5FD_MPIO_INDEPENDENT, + * this call is independent, and thus we cannot use + * MPI_File_set_view(). + * + * In this case, simply walk the vector, and issue an + * independent read for each entry. + * + * WARNING: At present, this function makes no provision + * entries of size greater than 2 GB in the vector. This + * will have to be fixed before release. + * + * Return: Success: SUCCEED. + * Failure: FAIL. + * + * Programmer: John Mainzer + * March 15, 2021 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD__mpio_read_vector(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, uint32_t count, H5FD_mem_t types[], + haddr_t addrs[], size_t sizes[], void *bufs[]) +{ + H5FD_mpio_t * file = (H5FD_mpio_t *)_file; + hbool_t vector_was_sorted = TRUE; + hbool_t fixed_size = FALSE; + size_t size; + H5FD_mem_t * s_types = NULL; + haddr_t * s_addrs = NULL; + size_t * s_sizes = NULL; + void ** s_bufs = NULL; + int * mpi_block_lengths = NULL; + char unused = 0; /* Unused, except for non-NULL pointer value */ + void * mpi_bufs_base = NULL; + MPI_Aint mpi_bufs_base_Aint; + MPI_Aint * mpi_bufs = NULL; + MPI_Aint * mpi_displacments = NULL; + MPI_Datatype buf_type = MPI_BYTE; /* MPI description of the selection in memory */ + hbool_t buf_type_created = FALSE; + MPI_Datatype file_type = MPI_BYTE; /* MPI description of the selection in file */ + hbool_t file_type_created = FALSE; + int i; + int j; + int mpi_code; /* MPI return code */ + MPI_Offset mpi_off = 0; + MPI_Status mpi_stat; /* Status from I/O operation */ + H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode */ + H5FD_mpio_collective_opt_t coll_opt_mode; /* whether we are doing collective or independent I/O */ + int size_i; +#if MPI_VERSION >= 3 + MPI_Count bytes_read = 0; /* Number of bytes read in */ + MPI_Count type_size; /* MPI datatype used for I/O's size */ + MPI_Count io_size; /* Actual number of bytes requested */ + MPI_Count n; +#else + int bytes_read = 0; /* Number of bytes read in */ + int type_size; /* MPI datatype used for I/O's size */ + int io_size; /* Actual number of bytes requested */ + int n; +#endif + hbool_t rank0_bcast = FALSE; /* If read-with-rank0-and-bcast flag was used */ +#ifdef H5FDmpio_DEBUG + hbool_t H5FD_mpio_debug_t_flag = (H5FD_mpio_debug_flags_s[(int)'t'] && H5FD_MPIO_TRACE_THIS_RANK(file)); + hbool_t H5FD_mpio_debug_r_flag = (H5FD_mpio_debug_flags_s[(int)'r'] && H5FD_MPIO_TRACE_THIS_RANK(file)); +#endif + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_t_flag) + HDfprintf(stderr, "%s: (%d) Entering\n", __func__, file->mpi_rank); +#endif + + /* Sanity checks */ + HDassert(file); + HDassert(H5FD_MPIO == file->pub.driver_id); + HDassert((types) || (count == 0)); + HDassert((addrs) || (count == 0)); + HDassert((sizes) || (count == 0)); + HDassert((bufs) || (count == 0)); + + /* verify that the first elements of the sizes and types arrays are + * valid. + */ + HDassert((count == 0) || (sizes[0] != 0)); + HDassert((count == 0) || (types[0] != H5FD_MEM_NOLIST)); + + /* Get the transfer mode from the API context + * + * This flag is set to H5FD_MPIO_COLLECTIVE if the API call is + * collective, and to H5FD_MPIO_INDEPENDENT if it is not. + * + * While this doesn't mean that we are actually about to do a collective + * read, it does mean that all ranks are here, so we can use MPI_File_set_view(). + */ + if (H5CX_get_io_xfer_mode(&xfer_mode) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode") + + if (xfer_mode == H5FD_MPIO_COLLECTIVE) { + + if (count == 1) { + /* Single block. Just use a series of MPI_BYTEs for the file view. + */ + size_i = (int)sizes[0]; + buf_type = MPI_BYTE; + file_type = MPI_BYTE; + mpi_bufs_base = bufs[0]; + + /* Setup s_sizes (needed for incomplete read filling code) */ + vector_was_sorted = TRUE; + s_sizes = sizes; + + /* some numeric conversions */ + if (H5FD_mpi_haddr_to_MPIOff(addrs[0], &mpi_off) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't set MPI offset") + } + else if (count > 0) { /* create MPI derived types describing the vector write */ + + /* sort the vector I/O request into increasing address order if required + * + * If the vector is already sorted, the base addresses of types, addrs, sizes, + * and bufs will be returned in s_types, s_addrs, s_sizes, and s_bufs respectively. + * + * If the vector was not already sorted, new, sorted versions of types, addrs, sizes, and bufs + * are allocated, populated, and returned in s_types, s_addrs, s_sizes, and s_bufs respectively. + * In this case, this function must free the memory allocated for the sorted vectors. + */ + if (H5FD_sort_vector_io_req(&vector_was_sorted, count, types, addrs, sizes, bufs, &s_types, + &s_addrs, &s_sizes, &s_bufs) < 0) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "can't sort vector I/O request") + + if ((NULL == (mpi_block_lengths = (int *)HDmalloc((size_t)count * sizeof(int)))) || + (NULL == (mpi_displacments = (MPI_Aint *)HDmalloc((size_t)count * sizeof(MPI_Aint)))) || + (NULL == (mpi_bufs = (MPI_Aint *)HDmalloc((size_t)count * sizeof(MPI_Aint))))) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't alloc mpi block lengths / displacement") + } + + /* when we setup mpi_bufs[] below, all addresses are offsets from + * mpi_bufs_base. + * + * Since these offsets must all be positive, we must scan through + * s_bufs[] to find the smallest value, and choose that for + * mpi_bufs_base. + */ + + j = 0; /* guess at the index of the smallest value of s_bufs[] */ + + for (i = 1; i < (int)count; i++) { + + if (s_bufs[i] < s_bufs[j]) { + + j = i; + } + } + + mpi_bufs_base = s_bufs[j]; + + if (MPI_SUCCESS != (mpi_code = MPI_Get_address(mpi_bufs_base, &mpi_bufs_base_Aint))) + + HMPI_GOTO_ERROR(FAIL, "MPI_Get_address for s_bufs[] to mpi_bufs_base failed", mpi_code) + + size_i = 1; + + fixed_size = FALSE; + + /* load the mpi_block_lengths and mpi_displacements arrays */ + for (i = 0; i < (int)count; i++) { + + if (!fixed_size) { + + if (sizes[i] == 0) { + + fixed_size = TRUE; + size = sizes[i - 1]; + } + else { + + size = s_sizes[i]; + } + } + + /* There is an obvious possibility of an overflow here, as size_t + * will typically be 64 bits, where as int will typically be 32 bits. + * This must be fixed, but it should be good enough for initial + * correctness testing. + * JRM -- 3/17/21 + */ + mpi_block_lengths[i] = (int)size; + mpi_displacments[i] = (MPI_Aint)s_addrs[i]; + + /* convert s_bufs[i] to MPI_Aint... */ + if (MPI_SUCCESS != (mpi_code = MPI_Get_address(s_bufs[i], &(mpi_bufs[i])))) + + HMPI_GOTO_ERROR(FAIL, "MPI_Get_address for s_bufs[] - mpi_bufs_base failed", mpi_code) + + /*... and then subtract mpi_bufs_base_Aint from it. */ +#if ((MPI_VERSION > 3) || ((MPI_VERSION == 3) && (MPI_SUBVERSION >= 1))) + mpi_bufs[i] = MPI_Aint_diff(mpi_bufs[i], mpi_bufs_base_Aint); +#else + mpi_bufs[i] = mpi_bufs[i] - mpi_bufs_base_Aint; +#endif + } + + /* create the memory MPI derived types */ + if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)count, mpi_block_lengths, mpi_bufs, + MPI_BYTE, &buf_type))) + + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed for buf_type failed", mpi_code) + + buf_type_created = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&buf_type))) + + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit for buf_type failed", mpi_code) + + /* create the file MPI derived type */ + if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)count, mpi_block_lengths, + mpi_displacments, MPI_BYTE, &file_type))) + + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed for file_type failed", mpi_code) + + file_type_created = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&file_type))) + + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit for file_type failed", mpi_code) + + /* some numeric conversions */ + if (H5FD_mpi_haddr_to_MPIOff((haddr_t)0, &mpi_off) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't set MPI off to 0") + } + else { + + /* setup for null participation in the collective operation. */ + + buf_type = MPI_BYTE; + file_type = MPI_BYTE; + + /* Set non-NULL pointer for I/O operation */ + mpi_bufs_base = (void *)(&unused); + + /* MPI count to read */ + size_i = 0; + + /* some numeric conversions */ + if (H5FD_mpi_haddr_to_MPIOff((haddr_t)0, &mpi_off) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't set MPI off to 0") + } + + /* Portably initialize MPI status variable */ + HDmemset(&mpi_stat, 0, sizeof(mpi_stat)); + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_r_flag) + HDfprintf(stdout, "%s: mpi_off = %ld size_i = %d\n", __func__, (long)mpi_off, size_i); +#endif + + /* Setup the file view. */ + if (MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, + H5FD_mpi_native_g, file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) + + /* Reset mpi_off to 0 since the view now starts at the data offset */ + if (H5FD_mpi_haddr_to_MPIOff((haddr_t)0, &mpi_off) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't set MPI off to 0") + + /* Get the collective_opt property to check whether the application wants to do IO individually. + */ + if (H5CX_get_mpio_coll_opt(&coll_opt_mode) < 0) + + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O collective_op property") + + /* Read the data. */ +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_r_flag) + HDfprintf(stdout, "%s: using MPIO collective mode\n", __func__); +#endif + if (coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO) { +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_r_flag) + HDfprintf(stdout, "%s: doing MPI collective IO\n", __func__); +#endif + /* Check whether we should read from rank 0 and broadcast to other ranks */ + if (H5CX_get_mpio_rank0_bcast()) { +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_r_flag) + HDfprintf(stdout, "%s: doing read-rank0-and-MPI_Bcast\n", __func__); +#endif + /* Indicate path we've taken */ + rank0_bcast = TRUE; + + /* Read on rank 0 Bcast to other ranks */ + if (file->mpi_rank == 0) + if (MPI_SUCCESS != (mpi_code = MPI_File_read_at(file->f, mpi_off, mpi_bufs_base, size_i, + buf_type, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code) + if (MPI_SUCCESS != (mpi_code = MPI_Bcast(mpi_bufs_base, size_i, buf_type, 0, file->comm))) + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) + } /* end if */ + else if (MPI_SUCCESS != (mpi_code = MPI_File_read_at_all(file->f, mpi_off, mpi_bufs_base, size_i, + buf_type, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code) + } /* end if */ + else if (size_i > 0) { +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_r_flag) + HDfprintf(stdout, "%s: doing MPI independent IO\n", __func__); +#endif + + if (MPI_SUCCESS != + (mpi_code = MPI_File_read_at(file->f, mpi_off, mpi_bufs_base, size_i, buf_type, &mpi_stat))) + + HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code) + + } /* end else */ + + /* Reset the file view */ + if (MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, + H5FD_mpi_native_g, file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) + + /* Only retrieve bytes read if this rank _actually_ participated in I/O */ + if (!rank0_bcast || (rank0_bcast && file->mpi_rank == 0)) { + /* How many bytes were actually read? */ +#if MPI_VERSION >= 3 + if (MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, buf_type, &bytes_read))) +#else + if (MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read))) +#endif + HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code) + } /* end if */ + + /* If the rank0-bcast feature was used, broadcast the # of bytes read to + * other ranks, which didn't perform any I/O. + */ + /* NOTE: This could be optimized further to be combined with the broadcast + * of the data. (QAK - 2019/1/2) + * Or have rank 0 clear the unread parts of the buffer prior to + * the bcast. (NAF - 2021/9/15) + */ + if (rank0_bcast) +#if MPI_VERSION >= 3 + if (MPI_SUCCESS != MPI_Bcast(&bytes_read, 1, MPI_COUNT, 0, file->comm)) +#else + if (MPI_SUCCESS != MPI_Bcast(&bytes_read, 1, MPI_INT, 0, file->comm)) +#endif + HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", 0) + + /* Get the type's size */ +#if MPI_VERSION >= 3 + if (MPI_SUCCESS != (mpi_code = MPI_Type_size_x(buf_type, &type_size))) +#else + if (MPI_SUCCESS != (mpi_code = MPI_Type_size(buf_type, &type_size))) +#endif + HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code) + + /* Compute the actual number of bytes requested */ + io_size = type_size * size_i; + + /* Check for read failure */ + if (bytes_read < 0 || bytes_read > io_size) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed") + + /* Check for incomplete read */ + n = io_size - bytes_read; + if (n > 0) { + i = (int)count - 1; + + /* Iterate over sorted array in reverse, filling in zeroes to + * sections of the buffers that were not read to */ + do { + HDassert(i >= 0); + +#if MPI_VERSION >= 3 + io_size = MIN(n, (MPI_Count)s_sizes[i]); + bytes_read = (MPI_Count)s_sizes[i] - io_size; +#else + io_size = MIN(n, (int)s_sizes[i]); + bytes_read = (int)s_sizes[i] - io_size; +#endif + HDassert(bytes_read >= 0); + + HDmemset((char *)bufs[i] + bytes_read, 0, (size_t)io_size); + + n -= io_size; + i--; + } while (n > 0); + } + } + else if (count > 0) { + + haddr_t max_addr = HADDR_MAX; + + /* The read is part of an independent operation. As a result, + * we can't use MPI_File_set_view() (since it it a collective operation), + * and thus there is no point in setting up an MPI derived type, as + * (to the best of my knowlege) MPI I/O doesn't have support for + * non-contiguous I/O in independent mode. + * + * Thus we have to read in each element of the vector in a separate + * MPI_File_read_at() call. + */ + + fixed_size = FALSE; + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_r_flag) + HDfprintf(stdout, "%s: doing MPI independent IO\n", __func__); +#endif + + for (i = 0; i < (int)count; i++) { + + if (H5FD_mpi_haddr_to_MPIOff(addrs[i], &mpi_off) < 0) + + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off") + + if (!fixed_size) { + + if (sizes[i] == 0) { + + fixed_size = TRUE; + size = sizes[i - 1]; + } + else { + + size = sizes[i]; + } + } + + size_i = (int)size; /* todo: fix potential for overflow */ + + /* Check if we acutally need to do I/O */ + if (addrs[i] < max_addr) { + /* Portably initialize MPI status variable */ + HDmemset(&mpi_stat, 0, sizeof(mpi_stat)); + + /* Issue read */ + if (MPI_SUCCESS != + (mpi_code = MPI_File_read_at(file->f, mpi_off, bufs[i], size_i, MPI_BYTE, &mpi_stat))) + + HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code) + + /* How many bytes were actually read? */ +#if MPI_VERSION >= 3 + if (MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&mpi_stat, MPI_BYTE, &bytes_read))) +#else + if (MPI_SUCCESS != (mpi_code = MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read))) +#endif + HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code) + + /* Check for read failure */ + if (bytes_read < 0 || bytes_read > size_i) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed") + + /* + * If we didn't read the entire I/O, fill in zeroes beyond end of + * the physical MPI file and don't issue any more reads at higher + * addresses. + */ + if ((n = (size_i - bytes_read)) > 0) { + HDmemset((char *)bufs[i] + bytes_read, 0, (size_t)n); + max_addr = addrs[i] + (haddr_t)bytes_read; + } + } + else { + /* Read is past the max address, fill in zeroes */ + HDmemset((char *)bufs[i], 0, size); + } + } + } + +done: + + if (!vector_was_sorted) { /* free sorted vectors if they exist */ + + if (s_types) { + + HDfree(s_types); + s_types = NULL; + } + + if (s_addrs) { + + HDfree(s_addrs); + s_addrs = NULL; + } + + if (s_sizes) { + + HDfree(s_sizes); + s_sizes = NULL; + } + + if (s_bufs) { + + HDfree(s_bufs); + s_bufs = NULL; + } + } + + if (mpi_block_lengths) { + + HDfree(mpi_block_lengths); + mpi_block_lengths = NULL; + } + + if (mpi_displacments) { + + HDfree(mpi_displacments); + mpi_displacments = NULL; + } + + if (mpi_bufs) { + + HDfree(mpi_bufs); + mpi_bufs = NULL; + } + + if (buf_type_created) { + MPI_Type_free(&buf_type); + } + + if (file_type_created) { + MPI_Type_free(&file_type); + } + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_t_flag) + HDfprintf(stdout, "%s: Leaving, proc %d: ret_value = %d\n", __func__, file->mpi_rank, ret_value); +#endif + + FUNC_LEAVE_NOAPI(ret_value) + +} /* end H5FD__mpio_read_vector() */ + +/*------------------------------------------------------------------------- + * Function: H5FD__mpio_write_vector + * + * Purpose: The behaviour of this function dependes on the value of + * the io_xfer_mode obtained from the context. + * + * If it is H5FD_MPIO_COLLECTIVE, this is a collective + * operation, which allows us to use MPI_File_set_view, and + * then perform the entire vector write in a single MPI call. + * + * Do this (if count is positive), by constructing memory + * and file derived types from the supplied vector, using + * file type to set the file view, and then writing the + * the memory type to file. Note that this write is + * either independent or collective depending on the + * value of mpio_coll_opt -- again obtained from the context. + * + * If count is zero, participate in the collective write + * (if so configured) with an empty write. + * + * Finally, set the file view back to its default state. + * + * In contrast, if io_xfer_mode is H5FD_MPIO_INDEPENDENT, + * this call is independent, and thus we cannot use + * MPI_File_set_view(). + * + * In this case, simply walk the vector, and issue an + * independent write for each entry. + * + * WARNING: At present, this function makes no provision + * entries of size greater than 2 GB in the vector. This + * will have to be fixed before release. + * + * Return: Success: SUCCEED. + * Failure: FAIL. + * + * Programmer: John Mainzer + * March 15, 2021 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD__mpio_write_vector(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, uint32_t count, H5FD_mem_t types[], + haddr_t addrs[], size_t sizes[], const void *bufs[]) +{ + H5FD_mpio_t * file = (H5FD_mpio_t *)_file; + hbool_t vector_was_sorted = TRUE; + hbool_t fixed_size = FALSE; + size_t size; + H5FD_mem_t * s_types = NULL; + haddr_t * s_addrs = NULL; + size_t * s_sizes = NULL; + void ** s_bufs = NULL; + int * mpi_block_lengths = NULL; + char unused = 0; /* Unused, except for non-NULL pointer value */ + void * mpi_bufs_base = NULL; + MPI_Aint mpi_bufs_base_Aint; + MPI_Aint * mpi_bufs = NULL; + MPI_Aint * mpi_displacments = NULL; + MPI_Datatype buf_type = MPI_BYTE; /* MPI description of the selection in memory */ + hbool_t buf_type_created = FALSE; + MPI_Datatype file_type = MPI_BYTE; /* MPI description of the selection in file */ + hbool_t file_type_created = FALSE; + int i; + int j; + int mpi_code; /* MPI return code */ + MPI_Offset mpi_off = 0; + MPI_Status mpi_stat; /* Status from I/O operation */ + H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode */ + H5FD_mpio_collective_opt_t coll_opt_mode; /* whether we are doing collective or independent I/O */ + int size_i; +#ifdef H5FDmpio_DEBUG + hbool_t H5FD_mpio_debug_t_flag = (H5FD_mpio_debug_flags_s[(int)'t'] && H5FD_MPIO_TRACE_THIS_RANK(file)); + hbool_t H5FD_mpio_debug_w_flag = (H5FD_mpio_debug_flags_s[(int)'w'] && H5FD_MPIO_TRACE_THIS_RANK(file)); +#endif + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_t_flag) + HDfprintf(stderr, "%s: (%d) Entering\n", __func__, file->mpi_rank); +#endif + + /* Sanity checks */ + HDassert(file); + HDassert(H5FD_MPIO == file->pub.driver_id); + HDassert((types) || (count == 0)); + HDassert((addrs) || (count == 0)); + HDassert((sizes) || (count == 0)); + HDassert((bufs) || (count == 0)); + + /* verify that the first elements of the sizes and types arrays are + * valid. + */ + HDassert((count == 0) || (sizes[0] != 0)); + HDassert((count == 0) || (types[0] != H5FD_MEM_NOLIST)); + + /* Verify that no data is written when between MPI_Barrier()s during file flush */ + + HDassert(!H5CX_get_mpi_file_flushing()); + + /* sort the vector I/O request into increasing address order if required + * + * If the vector is already sorted, the base addresses of types, addrs, sizes, + * and bufs will be returned in s_types, s_addrs, s_sizes, and s_bufs respectively. + * + * If the vector was not already sorted, new, sorted versions of types, addrs, sizes, and bufs + * are allocated, populated, and returned in s_types, s_addrs, s_sizes, and s_bufs respectively. + * In this case, this function must free the memory allocated for the sorted vectors. + */ + if (H5FD_sort_vector_io_req(&vector_was_sorted, count, types, addrs, sizes, bufs, &s_types, &s_addrs, + &s_sizes, &s_bufs) < 0) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "can't sort vector I/O request") + + /* Get the transfer mode from the API context + * + * This flag is set to H5FD_MPIO_COLLECTIVE if the API call is + * collective, and to H5FD_MPIO_INDEPENDENT if it is not. + * + * While this doesn't mean that we are actually about to do a collective + * write, it does mean that all ranks are here, so we can use MPI_File_set_view(). + */ + if (H5CX_get_io_xfer_mode(&xfer_mode) < 0) + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode") + + if (xfer_mode == H5FD_MPIO_COLLECTIVE) { + + if (count > 0) { /* create MPI derived types describing the vector write */ + + if ((NULL == (mpi_block_lengths = (int *)HDmalloc((size_t)count * sizeof(int)))) || + (NULL == (mpi_displacments = (MPI_Aint *)HDmalloc((size_t)count * sizeof(MPI_Aint)))) || + (NULL == (mpi_bufs = (MPI_Aint *)HDmalloc((size_t)count * sizeof(MPI_Aint))))) { + + HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't alloc mpi block lengths / displacement") + } + + /* when we setup mpi_bufs[] below, all addresses are offsets from + * mpi_bufs_base. + * + * Since these offsets must all be positive, we must scan through + * s_bufs[] to find the smallest value, and choose that for + * mpi_bufs_base. + */ + + j = 0; /* guess at the index of the smallest value of s_bufs[] */ + + for (i = 1; i < (int)count; i++) { + + if (s_bufs[i] < s_bufs[j]) { + + j = i; + } + } + + mpi_bufs_base = s_bufs[j]; + + if (MPI_SUCCESS != (mpi_code = MPI_Get_address(mpi_bufs_base, &mpi_bufs_base_Aint))) + + HMPI_GOTO_ERROR(FAIL, "MPI_Get_address for s_bufs[] to mpi_bufs_base failed", mpi_code) + + size_i = 1; + + fixed_size = FALSE; + + /* load the mpi_block_lengths and mpi_displacements arrays */ + for (i = 0; i < (int)count; i++) { + + if (!fixed_size) { + + if (sizes[i] == 0) { + + fixed_size = TRUE; + size = sizes[i - 1]; + } + else { + + size = s_sizes[i]; + } + } + + /* There is an obvious possibility of an overflow here, as size_t + * will typically be 64 bits, where as int will typically be 32 bits. + * This must be fixed, but it should be good enough for initial + * correctness testing. + * JRM -- 3/17/21 + */ + mpi_block_lengths[i] = (int)size; + mpi_displacments[i] = (MPI_Aint)s_addrs[i]; + + /* convert s_bufs[i] to MPI_Aint... */ + if (MPI_SUCCESS != (mpi_code = MPI_Get_address(s_bufs[i], &(mpi_bufs[i])))) + + HMPI_GOTO_ERROR(FAIL, "MPI_Get_address for s_bufs[] - mpi_bufs_base failed", mpi_code) + + /*... and then subtract mpi_bufs_base_Aint from it. */ +#if ((MPI_VERSION > 3) || ((MPI_VERSION == 3) && (MPI_SUBVERSION >= 1))) + mpi_bufs[i] = MPI_Aint_diff(mpi_bufs[i], mpi_bufs_base_Aint); +#else + mpi_bufs[i] = mpi_bufs[i] - mpi_bufs_base_Aint; +#endif + } + + /* create the memory MPI derived types */ + if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)count, mpi_block_lengths, mpi_bufs, + MPI_BYTE, &buf_type))) + + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed for buf_type failed", mpi_code) + + buf_type_created = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&buf_type))) + + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit for buf_type failed", mpi_code) + + /* create the file MPI derived type */ + if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)count, mpi_block_lengths, + mpi_displacments, MPI_BYTE, &file_type))) + + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed for file_type failed", mpi_code) + + file_type_created = TRUE; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&file_type))) + + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit for file_type failed", mpi_code) + } + else { + + /* setup for null participation in the collective operation. */ + + buf_type = MPI_BYTE; + file_type = MPI_BYTE; + + /* Set non-NULL pointer for I/O operation */ + mpi_bufs_base = (void *)(&unused); + + /* MPI count to write */ + size_i = 0; + } + + /* Portably initialize MPI status variable */ + HDmemset(&mpi_stat, 0, sizeof(MPI_Status)); + + /* some numeric conversions */ + if (H5FD_mpi_haddr_to_MPIOff((haddr_t)0, &mpi_off) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't set MPI off to 0") + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_w_flag) + HDfprintf(stdout, "%s: mpi_off = %ld size_i = %d\n", __func__, (long)mpi_off, size_i); +#endif + + /* Setup the file view. */ + if (MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, mpi_off, MPI_BYTE, file_type, + H5FD_mpi_native_g, file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) + + /* Get the collective_opt property to check whether the application wants to do IO individually. + */ + if (H5CX_get_mpio_coll_opt(&coll_opt_mode) < 0) + + HGOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "can't get MPI-I/O collective_op property") + + /* Write the data. */ +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_w_flag) + HDfprintf(stdout, "%s: using MPIO collective mode\n", __func__); +#endif + + if (coll_opt_mode == H5FD_MPIO_COLLECTIVE_IO) { +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_w_flag) + HDfprintf(stdout, "%s: doing MPI collective IO\n", __func__); +#endif + + if (MPI_SUCCESS != (mpi_code = MPI_File_write_at_all(file->f, mpi_off, mpi_bufs_base, size_i, + buf_type, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code) + } /* end if */ + else if (size_i > 0) { +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_w_flag) + HDfprintf(stdout, "%s: doing MPI independent IO\n", __func__); +#endif + + if (MPI_SUCCESS != + (mpi_code = MPI_File_write_at(file->f, mpi_off, mpi_bufs_base, size_i, buf_type, &mpi_stat))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code) + } /* end else */ + + /* Reset the file view */ + if (MPI_SUCCESS != (mpi_code = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, + H5FD_mpi_native_g, file->info))) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code) + } + else if (count > 0) { + + /* The write is part of an independent operation. As a result, + * we can't use MPI_File_set_view() (since it it a collective operation), + * and thus there is no point in setting up an MPI derived type, as + * (to the best of my knowlege) MPI I/O doesn't have support for + * non-contiguous I/O in independent mode. + * + * Thus we have to write out each element of the vector in a separate + * MPI_File_write_at() call. + */ + + fixed_size = FALSE; + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_w_flag) + HDfprintf(stdout, "%s: doing MPI independent IO\n", __func__); +#endif + + for (i = 0; i < (int)count; i++) { + + if (H5FD_mpi_haddr_to_MPIOff(s_addrs[i], &mpi_off) < 0) + + HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off") + + if (!fixed_size) { + + if (sizes[i] == 0) { + + fixed_size = TRUE; + size = sizes[i - 1]; + } + else { + + size = s_sizes[i]; + } + } + + size_i = (int)size; /* todo: fix potential for overflow */ + + if (MPI_SUCCESS != + (mpi_code = MPI_File_write_at(file->f, mpi_off, s_bufs[i], size_i, MPI_BYTE, &mpi_stat))) + + HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code) + } + } + + /* Each process will keep track of its perceived EOF value locally, and + * ultimately we will reduce this value to the maximum amongst all + * processes, but until then keep the actual eof at HADDR_UNDEF just in + * case something bad happens before that point. (rather have a value + * we know is wrong sitting around rather than one that could only + * potentially be wrong.) + */ + file->eof = HADDR_UNDEF; + + /* check to see if the local eof has changed been extended, and update if so. + * Since the vector write request has been sorted in increasing address order, + * we need only look at the address and size of the last element in the vector. + */ + if ((count > 0) && ((s_addrs[count - 1] + (haddr_t)(s_sizes[count - 1])) > file->local_eof)) { + + file->local_eof = (s_addrs[count - 1] + (haddr_t)(s_sizes[count - 1])); + } + +done: + + if (!vector_was_sorted) { /* free sorted vectors if they exist */ + + if (s_types) { + + HDfree(s_types); + s_types = NULL; + } + + if (s_addrs) { + + HDfree(s_addrs); + s_addrs = NULL; + } + + if (s_sizes) { + + HDfree(s_sizes); + s_sizes = NULL; + } + + if (s_bufs) { + + HDfree(s_bufs); + s_bufs = NULL; + } + } + + if (mpi_block_lengths) { + + HDfree(mpi_block_lengths); + mpi_block_lengths = NULL; + } + + if (mpi_displacments) { + + HDfree(mpi_displacments); + mpi_displacments = NULL; + } + + if (mpi_bufs) { + + HDfree(mpi_bufs); + mpi_bufs = NULL; + } + + if (buf_type_created) { + MPI_Type_free(&buf_type); + } + + if (file_type_created) { + MPI_Type_free(&file_type); + } + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_debug_t_flag) + HDfprintf(stdout, "%s: Leaving, proc %d: ret_value = %d\n", __func__, file->mpi_rank, ret_value); +#endif + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD__mpio_write_vector() */ + +/*------------------------------------------------------------------------- * Function: H5FD__mpio_flush * * Purpose: Makes sure that all data is on disk. This is collective. diff --git a/src/H5FDmulti.c b/src/H5FDmulti.c index 0d1967d..e51d101 100644 --- a/src/H5FDmulti.c +++ b/src/H5FDmulti.c @@ -198,6 +198,10 @@ static const H5FD_class_t H5FD_multi_g = { H5FD_multi_get_handle, /* get_handle */ H5FD_multi_read, /* read */ H5FD_multi_write, /* write */ + NULL, /*read_vector */ + NULL, /*write_vector */ + NULL, /* read_selection */ + NULL, /* write_selection */ H5FD_multi_flush, /* flush */ H5FD_multi_truncate, /* truncate */ H5FD_multi_lock, /* lock */ diff --git a/src/H5FDprivate.h b/src/H5FDprivate.h index 6dbd483..a13e7af 100644 --- a/src/H5FDprivate.h +++ b/src/H5FDprivate.h @@ -24,6 +24,7 @@ /* Private headers needed by this file */ #include "H5Pprivate.h" /* Property lists */ +#include "H5Sprivate.h" /* Dataspaces */ /* * The MPI drivers are needed because there are @@ -125,6 +126,22 @@ H5_DLL herr_t H5FD_set_feature_flags(H5FD_t *file, unsigned long feature_flags) H5_DLL herr_t H5FD_get_fs_type_map(const H5FD_t *file, H5FD_mem_t *type_map); H5_DLL herr_t H5FD_read(H5FD_t *file, H5FD_mem_t type, haddr_t addr, size_t size, void *buf /*out*/); H5_DLL herr_t H5FD_write(H5FD_t *file, H5FD_mem_t type, haddr_t addr, size_t size, const void *buf); +H5_DLL herr_t H5FD_read_vector(H5FD_t *file, uint32_t count, H5FD_mem_t types[], haddr_t addrs[], + size_t sizes[], void *bufs[] /* out */); +H5_DLL herr_t H5FD_write_vector(H5FD_t *file, uint32_t count, H5FD_mem_t types[], haddr_t addrs[], + size_t sizes[], const void *bufs[] /* out */); +H5_DLL herr_t H5FD_read_selection(H5FD_t *file, H5FD_mem_t type, uint32_t count, + const H5S_t *const *mem_spaces, const H5S_t *const *file_spaces, + haddr_t offsets[], size_t element_sizes[], void *bufs[] /* out */); +H5_DLL herr_t H5FD_write_selection(H5FD_t *file, H5FD_mem_t type, uint32_t count, + const H5S_t *const *mem_spaces, const H5S_t *const *file_spaces, + haddr_t offsets[], size_t element_sizes[], const void *bufs[]); +H5_DLL herr_t H5FD_read_selection_id(H5FD_t *file, H5FD_mem_t type, uint32_t count, hid_t mem_space_ids[], + hid_t file_space_ids[], haddr_t offsets[], size_t element_sizes[], + void *bufs[] /* out */); +H5_DLL herr_t H5FD_write_selection_id(H5FD_t *file, H5FD_mem_t type, uint32_t count, hid_t mem_space_ids[], + hid_t file_space_ids[], haddr_t offsets[], size_t element_sizes[], + const void *bufs[]); H5_DLL herr_t H5FD_flush(H5FD_t *file, hbool_t closing); H5_DLL herr_t H5FD_truncate(H5FD_t *file, hbool_t closing); H5_DLL herr_t H5FD_lock(H5FD_t *file, hbool_t rw); @@ -137,6 +154,11 @@ H5_DLL herr_t H5FD_set_base_addr(H5FD_t *file, haddr_t base_addr); H5_DLL haddr_t H5FD_get_base_addr(const H5FD_t *file); H5_DLL herr_t H5FD_set_paged_aggr(H5FD_t *file, hbool_t paged); +H5_DLL herr_t H5FD_sort_vector_io_req(hbool_t *vector_was_sorted, uint32_t count, H5FD_mem_t types[], + haddr_t addrs[], size_t sizes[], const void *bufs[], + H5FD_mem_t **s_types_ptr, haddr_t **s_addrs_ptr, size_t **s_sizes_ptr, + void ***s_bufs_ptr); + /* Function prototypes for MPI based VFDs*/ #ifdef H5_HAVE_PARALLEL /* General routines */ diff --git a/src/H5FDros3.c b/src/H5FDros3.c index a32d65e..0dd8cc3 100644 --- a/src/H5FDros3.c +++ b/src/H5FDros3.c @@ -264,6 +264,10 @@ static const H5FD_class_t H5FD_ros3_g = { H5FD__ros3_get_handle, /* get_handle */ H5FD__ros3_read, /* read */ H5FD__ros3_write, /* write */ + NULL, /* read_vector */ + NULL, /* write_vector */ + NULL, /* read_selection */ + NULL, /* write_selection */ NULL, /* flush */ H5FD__ros3_truncate, /* truncate */ NULL, /* lock */ diff --git a/src/H5FDsec2.c b/src/H5FDsec2.c index 15103da..be59102 100644 --- a/src/H5FDsec2.c +++ b/src/H5FDsec2.c @@ -170,6 +170,10 @@ static const H5FD_class_t H5FD_sec2_g = { H5FD__sec2_get_handle, /* get_handle */ H5FD__sec2_read, /* read */ H5FD__sec2_write, /* write */ + NULL, /* read_vector */ + NULL, /* write_vector */ + NULL, /* read_selection */ + NULL, /* write_selection */ NULL, /* flush */ H5FD__sec2_truncate, /* truncate */ H5FD__sec2_lock, /* lock */ diff --git a/src/H5FDsplitter.c b/src/H5FDsplitter.c index 3113e8b..b178b5b 100644 --- a/src/H5FDsplitter.c +++ b/src/H5FDsplitter.c @@ -160,6 +160,10 @@ static const H5FD_class_t H5FD_splitter_g = { H5FD__splitter_get_handle, /* get_handle */ H5FD__splitter_read, /* read */ H5FD__splitter_write, /* write */ + NULL, /* read_vector */ + NULL, /* write_vector */ + NULL, /* read_selection */ + NULL, /* write_selection */ H5FD__splitter_flush, /* flush */ H5FD__splitter_truncate, /* truncate */ H5FD__splitter_lock, /* lock */ diff --git a/src/H5FDstdio.c b/src/H5FDstdio.c index 312263c..f9cf350 100644 --- a/src/H5FDstdio.c +++ b/src/H5FDstdio.c @@ -210,6 +210,10 @@ static const H5FD_class_t H5FD_stdio_g = { H5FD_stdio_get_handle, /* get_handle */ H5FD_stdio_read, /* read */ H5FD_stdio_write, /* write */ + NULL, /* read_vector */ + NULL, /* write_vector */ + NULL, /* read_selection */ + NULL, /* write_selection */ H5FD_stdio_flush, /* flush */ H5FD_stdio_truncate, /* truncate */ H5FD_stdio_lock, /* lock */ diff --git a/src/H5Fio.c b/src/H5Fio.c index 5a9d2c1..123fe40 100644 --- a/src/H5Fio.c +++ b/src/H5Fio.c @@ -233,12 +233,103 @@ H5F_block_write(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size, const void /* Pass through page buffer layer */ if (H5PB_write(f->shared, map_type, addr, size, buf) < 0) HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "write through page buffer failed") - done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5F_block_write() */ /*------------------------------------------------------------------------- + * Function: H5F_shared_select_read + * + * Purpose: Reads some data from a file/server/etc into a buffer. + * The location of the data is defined by the mem_spaces and + * file_spaces dataspace arrays, along with the offsets + * array. The addresses is relative to the base address for + * the file. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Neil Fortner + * May 3 2021 + * + *------------------------------------------------------------------------- + */ +herr_t +H5F_shared_select_read(H5F_shared_t *f_sh, H5FD_mem_t type, uint32_t count, const H5S_t *const *mem_spaces, + const H5S_t *const *file_spaces, haddr_t offsets[], size_t element_sizes[], + void *bufs[] /* out */) +{ + H5FD_mem_t map_type; /* Mapped memory type */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + /* Sanity checks */ + HDassert(f_sh); + HDassert((mem_spaces) || (count == 0)); + HDassert((file_spaces) || (count == 0)); + HDassert((offsets) || (count == 0)); + HDassert((element_sizes) || (count == 0)); + HDassert((bufs) || (count == 0)); + + /* Treat global heap as raw data */ + map_type = (type == H5FD_MEM_GHEAP) ? H5FD_MEM_DRAW : type; + + /* Pass down to file driver layer (bypass page buffer for now) */ + if (H5FD_read_selection(f_sh->lf, map_type, count, mem_spaces, file_spaces, offsets, element_sizes, + bufs) < 0) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "selection read through file driver failed") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5F_shared_select_read() */ + +/*------------------------------------------------------------------------- + * Function: H5F_shared_select_write + * + * Purpose: Writes some data from a buffer to a file/server/etc. + * The location of the data is defined by the mem_spaces and + * file_spaces dataspace arrays, along with the offsets + * array. The addresses is relative to the base address for + * the file. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Neil Fortner + * May 4 2021 + * + *------------------------------------------------------------------------- + */ +herr_t +H5F_shared_select_write(H5F_shared_t *f_sh, H5FD_mem_t type, uint32_t count, const H5S_t *const *mem_spaces, + const H5S_t *const *file_spaces, haddr_t offsets[], size_t element_sizes[], + const void *bufs[]) +{ + H5FD_mem_t map_type; /* Mapped memory type */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + /* Sanity checks */ + HDassert(f_sh); + HDassert((mem_spaces) || (count == 0)); + HDassert((file_spaces) || (count == 0)); + HDassert((offsets) || (count == 0)); + HDassert((element_sizes) || (count == 0)); + HDassert((bufs) || (count == 0)); + + /* Treat global heap as raw data */ + map_type = (type == H5FD_MEM_GHEAP) ? H5FD_MEM_DRAW : type; + + /* Pass down to file driver layer (bypass page buffer for now) */ + if (H5FD_write_selection(f_sh->lf, map_type, count, mem_spaces, file_spaces, offsets, element_sizes, + bufs) < 0) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "selection write through file driver failed") + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5F_shared_select_write() */ + +/*------------------------------------------------------------------------- * Function: H5F_flush_tagged_metadata * * Purpose: Flushes metadata with specified tag in the metadata cache diff --git a/src/H5Fprivate.h b/src/H5Fprivate.h index 051abd9..a6c2f8a 100644 --- a/src/H5Fprivate.h +++ b/src/H5Fprivate.h @@ -760,6 +760,7 @@ struct H5O_loc_t; struct H5HG_heap_t; struct H5VL_class_t; struct H5P_genplist_t; +struct H5S_t; /* Forward declarations for anonymous H5F objects */ @@ -924,6 +925,16 @@ H5_DLL herr_t H5F_shared_block_write(H5F_shared_t *f_sh, H5FD_mem_t type, haddr_ const void *buf); H5_DLL herr_t H5F_block_write(H5F_t *f, H5FD_mem_t type, haddr_t addr, size_t size, const void *buf); +/* Functions that operate on selections of elements in the file */ +H5_DLL herr_t H5F_shared_select_read(H5F_shared_t *f_sh, H5FD_mem_t type, uint32_t count, + const struct H5S_t *const *mem_spaces, + const struct H5S_t *const *file_spaces, haddr_t offsets[], + size_t element_sizes[], void *bufs[] /* out */); +H5_DLL herr_t H5F_shared_select_write(H5F_shared_t *f_sh, H5FD_mem_t type, uint32_t count, + const struct H5S_t *const *mem_spaces, + const struct H5S_t *const *file_spaces, haddr_t offsets[], + size_t element_sizes[], const void *bufs[]); + /* Functions that flush or evict */ H5_DLL herr_t H5F_flush_tagged_metadata(H5F_t *f, haddr_t tag); H5_DLL herr_t H5F_evict_tagged_metadata(H5F_t *f, haddr_t tag); @@ -1306,6 +1306,73 @@ done: } /* end H5PB_write() */ /*------------------------------------------------------------------------- + * Function: H5PB_enabled + * + * Purpose: Check if the page buffer may be enabled for the specified + * file and data access type. + * + * Return: Non-negative on success/Negative on failure + * + * Programmer: Neil Fortner + * + *------------------------------------------------------------------------- + */ +htri_t +H5PB_enabled(H5F_shared_t *f_sh, H5FD_mem_t type) +{ + H5PB_t *page_buf; /* Page buffering info for this file */ + hbool_t bypass_pb = FALSE; /* Whether to bypass page buffering */ + htri_t ret_value; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + /* Sanity checks */ + HDassert(f_sh); + + /* Get pointer to page buffer info for this file */ + page_buf = f_sh->page_buf; + +#ifdef H5_HAVE_PARALLEL + if (H5F_SHARED_HAS_FEATURE(f_sh, H5FD_FEAT_HAS_MPI)) { +#if 1 + bypass_pb = TRUE; +#else + /* MSC - why this stopped working ? */ + int mpi_size; + + if ((mpi_size = H5F_shared_mpi_get_size(f_sh)) < 0) + HGOTO_ERROR(H5E_PAGEBUF, H5E_CANTGET, FAIL, "can't retrieve MPI communicator size") + if (1 != mpi_size) + bypass_pb = TRUE; +#endif + } /* end if */ +#endif + + /* If page buffering is disabled, or the I/O size is larger than that of a + * single page, or if this is a parallel raw data access, bypass page + * buffering. + */ + if (NULL == page_buf || (bypass_pb && H5FD_MEM_DRAW == type)) { + /* Update statistics, since wherever this function is called, if it + * returns FALSE, the calling function performs I/O avoiding the page + * buffer layer */ + if (page_buf) { + HDassert(type == H5FD_MEM_DRAW); + page_buf->bypasses[1]++; + } /* end if */ + + /* Page buffer is disabled, at least for this data access type */ + ret_value = FALSE; + } /* end if */ + else + /* Page buffer may be enabled */ + ret_value = TRUE; + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5PB_enabled() */ + +/*------------------------------------------------------------------------- * Function: H5PB__insert_entry() * * Purpose: This function was created without documentation. diff --git a/src/H5PBprivate.h b/src/H5PBprivate.h index e0197bf..2fc70c6 100644 --- a/src/H5PBprivate.h +++ b/src/H5PBprivate.h @@ -91,6 +91,7 @@ H5_DLL herr_t H5PB_update_entry(H5PB_t *page_buf, haddr_t addr, size_t size, con H5_DLL herr_t H5PB_remove_entry(const H5F_shared_t *f_sh, haddr_t addr); H5_DLL herr_t H5PB_read(H5F_shared_t *f_sh, H5FD_mem_t type, haddr_t addr, size_t size, void *buf /*out*/); H5_DLL herr_t H5PB_write(H5F_shared_t *f_sh, H5FD_mem_t type, haddr_t addr, size_t size, const void *buf); +H5_DLL htri_t H5PB_enabled(H5F_shared_t *f_sh, H5FD_mem_t type); /* Statistics routines */ H5_DLL herr_t H5PB_reset_stats(H5PB_t *page_buf); diff --git a/src/H5private.h b/src/H5private.h index 2318f10..d1275dc 100644 --- a/src/H5private.h +++ b/src/H5private.h @@ -2000,6 +2000,11 @@ extern hbool_t H5_libterm_g; /* Is the library being shutdown? */ #endif /* H5_HAVE_THREADSAFE */ +/* Extern global to determine if we shoudl use selection I/O if available (this + * variable should be removed once selection I/O performs as well as the + * previous scalar I/O implementation */ +extern hbool_t H5_use_selection_io_g; + #ifdef H5_HAVE_CODESTACK /* Include required function stack header */ |