summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Warren <Richard.Warren@hdfgroup.org>2017-07-13 17:44:47 (GMT)
committerRichard Warren <Richard.Warren@hdfgroup.org>2017-07-13 17:44:47 (GMT)
commit516f0e3661d305419a817f44b3fdaf0337a548c3 (patch)
tree8395a8f0e190a5cd4bb10798081d707ad9385b68
parent2da8e74cc5ca9c0bdae85189f351fbe9c5f40500 (diff)
parent6a5aa46e936340ed540359290374fa909f9213a6 (diff)
downloadhdf5-516f0e3661d305419a817f44b3fdaf0337a548c3.zip
hdf5-516f0e3661d305419a817f44b3fdaf0337a548c3.tar.gz
hdf5-516f0e3661d305419a817f44b3fdaf0337a548c3.tar.bz2
Merge pull request #596 in HDFFV/hdf5 from GreaterThan-2GB-MPIO to develop
* commit '6a5aa46e936340ed540359290374fa909f9213a6': Added a brief outline for Large MPI-IO transfers into RELEASE.txt Fix up the ExpressMode check for skipping slow running tests. Include code fixes and additional modifications pointed out by code reviewers Commited changes to the development branch here to allow a pull request to be published Commit changes needed for pull request
-rw-r--r--MANIFEST1
-rw-r--r--release_docs/RELEASE.txt24
-rw-r--r--src/H5Smpio.c504
-rw-r--r--testpar/CMakeLists.txt1
-rw-r--r--testpar/Makefile.am2
-rw-r--r--testpar/t_bigio.c2153
6 files changed, 2637 insertions, 48 deletions
diff --git a/MANIFEST b/MANIFEST
index 475b674..27f38be 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -1222,6 +1222,7 @@
./testpar/COPYING
./testpar/Makefile.am
+./testpar/t_bigio.c
./testpar/t_cache.c
./testpar/t_cache_image.c
./testpar/t_chunk_alloc.c
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt
index 20d58b3..4335b37 100644
--- a/release_docs/RELEASE.txt
+++ b/release_docs/RELEASE.txt
@@ -62,7 +62,29 @@ New Features
Parallel Library:
-----------------
- -
+ - Large MPI-IO transfers
+
+ Previous releases of PHDF5 would fail when attempting to
+ read or write greater than 2GB of data in a single IO operation.
+ This issue stems principally from an MPI API whose definitions
+ utilize 32 bit integers to describe the number of data elements
+ and datatype that MPI should use to effect a data transfer.
+ Historically, HDF5 has invoked MPI-IO with the number of
+ elements in a contiguous buffer represented as the length
+ of that buffer in bytes.
+
+ Resolving the issue and thus enabling larger MPI-IO transfers
+ is accomplished first, by detecting when a user IO request would
+ exceed the 2GB limit as described above. Once a transfer request
+ is identified as requiring special handling, PHDF5 now creates a
+ derived datatype consisting of a vector of fixed sized blocks
+ which is in turn wrapped within a single MPI_Type_struct to
+ contain the vector and any remaining data. The newly created
+ datatype is then used in place of MPI_BYTE and can be used to
+ fulfill the original user request without encountering API
+ errors.
+
+ (RAW – 2017/07/11, HDFFV-8839)
Fortran Library:
----------------
diff --git a/src/H5Smpio.c b/src/H5Smpio.c
index c24c455..46f7a59 100644
--- a/src/H5Smpio.c
+++ b/src/H5Smpio.c
@@ -33,7 +33,7 @@
#include "H5Oprivate.h" /* Object headers */
#include "H5Pprivate.h" /* Property lists */
#include "H5Spkg.h" /* Dataspaces */
-#include "H5VMprivate.h" /* Vector and array functions */
+#include "H5VMprivate.h" /* Vector and array functions */
#ifdef H5_HAVE_PARALLEL
@@ -55,9 +55,42 @@ static herr_t H5S_mpio_span_hyper_type(const H5S_t *space, size_t elmt_size,
MPI_Datatype *new_type, int *count, hbool_t *is_derived_type);
static herr_t H5S_obtain_datatype(const hsize_t down[], H5S_hyper_span_t* span,
const MPI_Datatype *elmt_type, MPI_Datatype *span_type, size_t elmt_size);
+static herr_t H5S_mpio_create_large_type (hsize_t, MPI_Aint, MPI_Datatype , MPI_Datatype *);
+
#define H5S_MPIO_INITIAL_ALLOC_COUNT 256
+#define TWO_GIG_LIMIT 2147483648
+
+#ifndef H5S_MAX_MPI_COUNT
+#define H5S_MAX_MPI_COUNT 536870911 /* (2^29)-1 */
+#endif
+
+static hsize_t bigio_count = H5S_MAX_MPI_COUNT;
+
+/*-------------------------------------------------------------------------
+ * Function: H5S_mpio_set_bigio_count
+ *
+ * Purpose: Allow us to programatically change the switch point
+ * when we utilize derived datatypes. This is of
+ * particular interest for allowing nightly testing
+ *
+ * Return: the current/previous value of bigio_count.
+ *
+ * Programmer: Richard Warren, March 10, 2017
+ *
+ *-------------------------------------------------------------------------
+ */
+hsize_t
+H5S_mpio_set_bigio_count(hsize_t new_count)
+{
+ hsize_t orig_count = bigio_count;
+ if ((new_count > 0) && (new_count < TWO_GIG_LIMIT)) {
+ bigio_count = new_count;
+ }
+ return orig_count;
+}
+
/*-------------------------------------------------------------------------
* Function: H5S_mpio_all_type
@@ -72,6 +105,11 @@ static herr_t H5S_obtain_datatype(const hsize_t down[], H5S_hyper_span_t* span,
* *is_derived_type 0 if MPI primitive type, 1 if derived
*
* Programmer: rky 980813
+ * Modifications:
+ * Mohamad Chaarawi
+ * Adding support for large datatypes (beyond the limit of a
+ * 32 bit integer.
+ *
*
*-------------------------------------------------------------------------
*/
@@ -95,11 +133,22 @@ H5S_mpio_all_type(const H5S_t *space, size_t elmt_size,
H5_CHECKED_ASSIGN(nelmts, hsize_t, snelmts, hssize_t);
total_bytes = (hsize_t)elmt_size * nelmts;
-
- /* fill in the return values */
- *new_type = MPI_BYTE;
- H5_CHECKED_ASSIGN(*count, int, total_bytes, hsize_t);
- *is_derived_type = FALSE;
+ /* Verify that the size can be expressed as a 32 bit integer */
+ if(bigio_count >= total_bytes) {
+ /* fill in the return values */
+ *new_type = MPI_BYTE;
+ H5_CHECKED_ASSIGN(*count, int, total_bytes, hsize_t);
+ *is_derived_type = FALSE;
+ }
+ else {
+ /* Create a LARGE derived datatype for this transfer */
+ if (H5S_mpio_create_large_type (total_bytes, 0, MPI_BYTE, new_type) < 0) {
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
+ "couldn't create a large datatype from the all selection")
+ }
+ *count = 1;
+ *is_derived_type = TRUE;
+ }
done:
FUNC_LEAVE_NOAPI(ret_value)
@@ -167,27 +216,103 @@ H5S_mpio_create_point_datatype (size_t elmt_size, hsize_t num_points,
HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
elmt_type_created = TRUE;
+ /* Check whether standard or BIGIO processing will be employeed */
+ if(bigio_count >= num_points) {
#if MPI_VERSION >= 3
- /* Create an MPI datatype for the whole point selection */
- if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block((int)num_points, 1, disp, elmt_type, new_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_indexed_block failed", mpi_code)
+ /* Create an MPI datatype for the whole point selection */
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block((int)num_points, 1, disp, elmt_type, new_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_indexed_block failed", mpi_code)
#else
- /* Allocate block sizes for MPI datatype call */
- if(NULL == (blocks = (int *)H5MM_malloc(sizeof(int) * num_points)))
- HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks")
+ /* Allocate block sizes for MPI datatype call */
+ if(NULL == (blocks = (int *)H5MM_malloc(sizeof(int) * num_points)))
+ HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks")
- for(u = 0; u < num_points; u++)
- blocks[u] = 1;
+ for(u = 0; u < num_points; u++)
+ blocks[u] = 1;
- /* Create an MPI datatype for the whole point selection */
- if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)num_points, blocks, disp, elmt_type, new_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_indexed_block failed", mpi_code)
+ /* Create an MPI datatype for the whole point selection */
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)num_points, blocks, disp, elmt_type, new_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
#endif
- /* Commit MPI datatype for later use */
- if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
+ /* Commit MPI datatype for later use */
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
+ }
+ else {
+ /* use LARGE_DATATYPE::
+ * We'll create an hindexed_block type for every 2G point count and then combine
+ * those and any remaining points into a single large datatype.
+ */
+ int total_types, i;
+ int remaining_points;
+ int num_big_types;
+ hsize_t leftover;
+
+ int *inner_blocks;
+ MPI_Aint *inner_disps;
+ MPI_Datatype *inner_types = NULL;
+
+ /* Calculate how many Big MPI datatypes are needed to represent the buffer */
+ num_big_types = (int)(num_points/bigio_count);
+
+ leftover = (hsize_t)num_points - (hsize_t)num_big_types * (hsize_t)bigio_count;
+ H5_CHECKED_ASSIGN(remaining_points, int, leftover, hsize_t);
+
+ total_types = (int)(remaining_points) ? (num_big_types + 1) : num_big_types;
+
+ /* Allocate array if MPI derived types needed */
+ if(NULL == (inner_types = (MPI_Datatype *)H5MM_malloc((sizeof(MPI_Datatype) * (size_t)total_types))))
+ HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks")
+
+ if(NULL == (inner_blocks = (int *)H5MM_malloc(sizeof(int) * (size_t)total_types)))
+ HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks")
+
+ if(NULL == (inner_disps = (MPI_Aint *)H5MM_malloc(sizeof(MPI_Aint) * (size_t)total_types)))
+ HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks")
+
+ for(i=0 ; i<num_big_types ; i++) {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block(bigio_count,
+ 1,
+ &disp[i*bigio_count],
+ elmt_type,
+ &inner_types[i]))) {
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code);
+ }
+ inner_blocks[i] = 1;
+ inner_disps[i] = 0;
+ }
+
+ if(remaining_points) {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block(remaining_points,
+ 1,
+ &disp[num_big_types*bigio_count],
+ elmt_type,
+ &inner_types[num_big_types]))) {
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code);
+ }
+ inner_blocks[num_big_types] = 1;
+ inner_disps[num_big_types] = 0;
+ }
+
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(total_types,
+ inner_blocks,
+ inner_disps,
+ inner_types,
+ new_type))) {
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct", mpi_code);
+ }
+ for(i=0 ; i<total_types ; i++)
+ MPI_Type_free(&inner_types[i]);
+
+ H5MM_free(inner_types);
+ H5MM_free(inner_blocks);
+ H5MM_free(inner_disps);
+ /* Commit MPI datatype for later use */
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
+ }
done:
if(elmt_type_created)
MPI_Type_free(&elmt_type);
@@ -481,7 +606,10 @@ done:
* *is_derived_type 0 if MPI primitive type, 1 if derived
*
* Programmer: rky 980813
- *
+ * Modifications:
+ * Mohamad Chaarawi
+ * Adding support for large datatypes (beyond the limit of a
+ * 32 bit integer.
*-------------------------------------------------------------------------
*/
static herr_t
@@ -636,8 +764,25 @@ H5S_mpio_hyper_type(const H5S_t *space, size_t elmt_size,
HDfprintf(H5DEBUG(S), "d[%d].xtent=%Hu \n", i, d[i].xtent);
}
#endif
- if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &inner_type)))
+
+ /* LARGE_DATATYPE::
+ * Check if the number of elements to form the inner type fits into a 32 bit integer.
+ * If yes then just create the innertype with MPI_Type_contiguous.
+ * Otherwise create a compound datatype by iterating as many times as needed
+ * for the innertype to be created.
+ */
+ if(bigio_count >= elmt_size) {
+ /* Use a single MPI datatype that has a 32 bit size */
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &inner_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
+ }
+ else {
+ /* Create the compound datatype for this operation (> 2GB) */
+ if (H5S_mpio_create_large_type (elmt_size, 0, MPI_BYTE, &inner_type) < 0) {
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
+ "couldn't ccreate a large inner datatype in hyper selection")
+ }
+ }
/*******************************************************
* Construct the type by walking the hyperslab dims
@@ -645,30 +790,93 @@ H5S_mpio_hyper_type(const H5S_t *space, size_t elmt_size,
*******************************************************/
for(i = ((int)rank) - 1; i >= 0; --i) {
#ifdef H5S_DEBUG
- if(H5DEBUG(S))
- HDfprintf(H5DEBUG(S), "%s: Dimension i=%d \n"
- "start=%Hd count=%Hu block=%Hu stride=%Hu, xtent=%Hu max_xtent=%d\n",
- FUNC, i, d[i].start, d[i].count, d[i].block, d[i].strid, d[i].xtent, max_xtent[i]);
+ if(H5DEBUG(S))
+ HDfprintf(H5DEBUG(S), "%s: Dimension i=%d \n"
+ "start=%Hd count=%Hu block=%Hu stride=%Hu, xtent=%Hu max_xtent=%d\n",
+ FUNC, i, d[i].start, d[i].count, d[i].block, d[i].strid, d[i].xtent, max_xtent[i]);
#endif
#ifdef H5S_DEBUG
- if(H5DEBUG(S))
- HDfprintf(H5DEBUG(S), "%s: i=%d Making vector-type \n", FUNC,i);
+ if(H5DEBUG(S))
+ HDfprintf(H5DEBUG(S), "%s: i=%d Making vector-type \n", FUNC,i);
#endif
/****************************************
* Build vector type of the selection.
****************************************/
- mpi_code = MPI_Type_vector((int)(d[i].count), /* count */
- (int)(d[i].block), /* blocklength */
- (int)(d[i].strid), /* stride */
- inner_type, /* old type */
- &outer_type); /* new type */
+ if (bigio_count >= d[i].count &&
+ bigio_count >= d[i].block &&
+ bigio_count >= d[i].strid) {
+
+ /* All the parameters fit into 32 bit integers so create the vector type normally */
+ mpi_code = MPI_Type_vector((int)(d[i].count), /* count */
+ (int)(d[i].block), /* blocklength */
+ (int)(d[i].strid), /* stride */
+ inner_type, /* old type */
+ &outer_type); /* new type */
+
+ MPI_Type_free(&inner_type);
+ if(mpi_code != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "couldn't create MPI vector type", mpi_code)
+ }
+ else {
+ /* Things get a bit more complicated and require LARGE_DATATYPE processing
+ * There are two MPI datatypes that need to be created:
+ * 1) an internal contiguous block; and
+ * 2) a collection of elements where an element is a contiguous block(1).
+ * Remember that the input arguments to the MPI-IO functions use integer
+ * values to represent element counts. We ARE allowed however, in the
+ * more recent MPI implementations to use constructed datatypes whereby
+ * the total number of bytes in a transfer could be :
+ * (2GB-1)number_of_blocks * the_datatype_extent.
+ */
+
+ MPI_Aint stride_in_bytes, inner_extent;
+ MPI_Datatype block_type;
+
+ /* create a contiguous datatype inner_type x number of BLOCKS.
+ * Again we need to check that the number of BLOCKS can fit into
+ * a 32 bit integer */
+ if (bigio_count < d[i].block) {
+ if (H5S_mpio_create_large_type(d[i].block, 0, inner_type,
+ &block_type) < 0) {
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
+ "couldn't ccreate a large block datatype in hyper selection")
+ }
+ }
+ else {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)d[i].block,
+ inner_type,
+ &block_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
+ }
- MPI_Type_free(&inner_type);
- if(mpi_code != MPI_SUCCESS)
- HMPI_GOTO_ERROR(FAIL, "couldn't create MPI vector type", mpi_code)
+ MPI_Type_extent (inner_type, &inner_extent);
+ stride_in_bytes = inner_extent * (MPI_Aint)d[i].strid;
- /****************************************
+ /* If the element count is larger than what a 32 bit integer can hold,
+ * we call the large type creation function to handle that
+ */
+ if (bigio_count < d[i].count) {
+ if (H5S_mpio_create_large_type (d[i].count, stride_in_bytes, block_type,
+ &outer_type) < 0) {
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
+ "couldn't create a large outer datatype in hyper selection")
+ }
+ }
+ /* otherwise a regular create_hvector will do */
+ else {
+ mpi_code = MPI_Type_create_hvector((int)d[i].count, /* count */
+ 1, /* blocklength */
+ stride_in_bytes, /* stride in bytes*/
+ block_type, /* old type */
+ &outer_type); /* new type */
+ if(MPI_SUCCESS != mpi_code)
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
+ }
+ MPI_Type_free(&block_type);
+ MPI_Type_free(&inner_type);
+ }
+ /****************************************
* Then build the dimension type as (start, vector type, xtent).
****************************************/
/* calculate start and extent values of this dimension */
@@ -752,6 +960,10 @@ done:
*
* Programmer: kyang
*
+ * Modifications:
+ * Mohamad Chaarawi
+ * Adding support for large datatypes (beyond the limit of a
+ * 32 bit integer.
*-------------------------------------------------------------------------
*/
static herr_t
@@ -774,8 +986,17 @@ H5S_mpio_span_hyper_type(const H5S_t *space, size_t elmt_size,
HDassert(space->select.sel_info.hslab->span_lst->head);
/* Create the base type for an element */
- if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &elmt_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
+ if (bigio_count >= elmt_size) {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &elmt_type))) {
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
+ }
+ }
+ else {
+ if (H5S_mpio_create_large_type (elmt_size, 0, MPI_BYTE, &elmt_type) < 0) {
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
+ "couldn't create a large element datatype in span_hyper selection")
+ }
+ }
elmt_type_is_derived = TRUE;
/* Compute 'down' sizes for each dimension */
@@ -821,14 +1042,15 @@ static herr_t
H5S_obtain_datatype(const hsize_t *down, H5S_hyper_span_t *span,
const MPI_Datatype *elmt_type, MPI_Datatype *span_type, size_t elmt_size)
{
- size_t alloc_count; /* Number of span tree nodes allocated at this level */
- size_t outercount; /* Number of span tree nodes at this level */
+ size_t alloc_count = 0; /* Number of span tree nodes allocated at this level */
+ size_t outercount = 0; /* Number of span tree nodes at this level */
MPI_Datatype *inner_type = NULL;
hbool_t inner_types_freed = FALSE; /* Whether the inner_type MPI datatypes have been freed */
hbool_t span_type_valid = FALSE; /* Whether the span_type MPI datatypes is valid */
+ hbool_t large_block = FALSE; /* Wether the block length is larger than 32 bit integer */
int *blocklen = NULL;
MPI_Aint *disp = NULL;
- H5S_hyper_span_t *tspan; /* Temporary pointer to span tree node */
+ H5S_hyper_span_t *tspan = NULL; /* Temporary pointer to span tree node */
int mpi_code; /* MPI return status code */
herr_t ret_value = SUCCEED; /* Return value */
@@ -870,14 +1092,70 @@ H5S_obtain_datatype(const hsize_t *down, H5S_hyper_span_t *span,
disp[outercount] = (MPI_Aint)elmt_size * tspan->low;
H5_CHECK_OVERFLOW(tspan->nelem, hsize_t, int)
blocklen[outercount] = (int)tspan->nelem;
-
tspan = tspan->next;
+
+ if (bigio_count < blocklen[outercount]) {
+ large_block = TRUE; /* at least one block type is large, so set this flag to true */
+ }
+
outercount++;
} /* end while */
- if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)outercount, blocklen, disp, *elmt_type, span_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
- span_type_valid = TRUE;
+ /* Everything fits into integers, so cast them and use hindexed */
+ if (bigio_count >= outercount && large_block == FALSE) {
+
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)outercount, blocklen, disp, *elmt_type, span_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
+ span_type_valid = TRUE;
+ }
+ else { /* LARGE_DATATYPE:: Something doesn't fit into a 32 bit integer */
+ size_t i;
+
+ for (i=0 ; i<outercount ; i++) {
+ MPI_Datatype temp_type = MPI_DATATYPE_NULL, outer_type = MPI_DATATYPE_NULL;
+ /* create the block type from elmt_type while checking the 32 bit int limit */
+ if (blocklen[i] > bigio_count) {
+ if (H5S_mpio_create_large_type (blocklen[i], 0, *elmt_type, &temp_type) < 0) {
+ HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,
+ "couldn't create a large element datatype in span_hyper selection")
+ }
+ }
+ else {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)blocklen[i],
+ *elmt_type,
+ &temp_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
+ }
+
+ /* combine the current datatype that is created with this current block type */
+ if (0 == i) { /* first iteration, there is no combined datatype yet */
+ *span_type = temp_type;
+ }
+ else {
+ int bl[2] = {1,1};
+ MPI_Aint ds[2] = {disp[i-1],disp[i]};
+ MPI_Datatype dt[2] = {*span_type, temp_type};
+
+ if (MPI_SUCCESS != (mpi_code = MPI_Type_create_struct (2, /* count */
+ bl, /* blocklength */
+ ds, /* stride in bytes*/
+ dt, /* old type */
+ &outer_type))){ /* new type */
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
+ }
+ *span_type = outer_type;
+ }
+
+ if (outer_type != MPI_DATATYPE_NULL)
+ MPI_Type_free(&outer_type);
+ /* temp_type shouldn't be freed here...
+ * Note that we have simply copied it above (not MPI_Type_dup)
+ * into the 'span_type' argument of the caller.
+ * The caller needs to deal with it there!
+ */
+ }
+ } /* end (LARGE_DATATYPE::) */
+
} /* end if */
else {
size_t u; /* Local index variable */
@@ -1091,5 +1369,139 @@ H5S_mpio_space_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type
done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5S_mpio_space_type() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5S_mpio_create_large_type
+ *
+ * Purpose: Create a large datatype of size larger than what a 32 bit integer
+ * can hold.
+ *
+ * Return: non-negative on success, negative on failure.
+ *
+ * *new_type the new datatype created
+ *
+ * Programmer: Mohamad Chaarawi
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t H5S_mpio_create_large_type (hsize_t num_elements,
+ MPI_Aint stride_bytes,
+ MPI_Datatype old_type,
+ MPI_Datatype *new_type)
+{
+ int num_big_types; /* num times the 2G datatype will be repeated */
+ int remaining_bytes; /* the number of bytes left that can be held in an int value */
+ hsize_t leftover;
+ int block_len[2];
+ int mpi_code; /* MPI return code */
+ MPI_Datatype inner_type, outer_type, leftover_type, type[2];
+ MPI_Aint disp[2], old_extent;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+ /* Calculate how many Big MPI datatypes are needed to represent the buffer */
+ num_big_types = (int)(num_elements/bigio_count);
+ leftover = num_elements - num_big_types * (hsize_t)bigio_count;
+ H5_CHECKED_ASSIGN(remaining_bytes, int, leftover, hsize_t);
+
+ /* Create a contiguous datatype of size equal to the largest
+ * number that a 32 bit integer can hold x size of old type.
+ * If the displacement is 0, then the type is contiguous, otherwise
+ * use type_hvector to create the type with the displacement provided
+ */
+ if (0 == stride_bytes) {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(bigio_count,
+ old_type,
+ &inner_type))) {
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
+ }
+ }
+ else {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector (bigio_count,
+ 1,
+ stride_bytes,
+ old_type,
+ &inner_type))) {
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
+ }
+ }
+
+ /* Create a contiguous datatype of the buffer (minus the remaining < 2GB part)
+ * If a stride is present, use hvector type
+ */
+ if (0 == stride_bytes) {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(num_big_types,
+ inner_type,
+ &outer_type))) {
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
+ }
+ }
+ else {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector (num_big_types,
+ 1,
+ stride_bytes,
+ inner_type,
+ &outer_type))) {
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
+ }
+ }
+
+ MPI_Type_free(&inner_type);
+
+ /* If there is a remaining part create a contiguous/vector datatype and then
+ * use a struct datatype to encapsulate everything.
+ */
+ if(remaining_bytes) {
+ if (stride_bytes == 0) {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous (remaining_bytes,
+ old_type,
+ &leftover_type))) {
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
+ }
+ }
+ else {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector
+ ((int)(num_elements - (hsize_t)num_big_types*bigio_count),
+ 1,
+ stride_bytes,
+ old_type,
+ &leftover_type))) {
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
+ }
+ }
+
+ MPI_Type_extent (old_type, &old_extent);
+
+ /* Set up the arguments for MPI_Type_struct constructor */
+ type[0] = outer_type;
+ type[1] = leftover_type;
+ block_len[0] = 1;
+ block_len[1] = 1;
+ disp[0] = 0;
+ disp[1] = (old_extent+stride_bytes)*num_big_types*(MPI_Aint)bigio_count;
+
+ if(MPI_SUCCESS != (mpi_code =
+ MPI_Type_create_struct(2, block_len, disp, type, new_type))) {
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
+ }
+
+ MPI_Type_free(&outer_type);
+ MPI_Type_free(&leftover_type);
+ }
+ else {
+ /* There are no remaining bytes so just set the new type to
+ * the outer type created */
+ *new_type = outer_type;
+ }
+
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5S_mpio_create_large_type */
+
#endif /* H5_HAVE_PARALLEL */
diff --git a/testpar/CMakeLists.txt b/testpar/CMakeLists.txt
index 298d326..e994b65 100644
--- a/testpar/CMakeLists.txt
+++ b/testpar/CMakeLists.txt
@@ -43,6 +43,7 @@ ENDMACRO (ADD_H5P_EXE file)
set (H5P_TESTS
t_mpi
+ t_bigio
t_cache
t_pflush1
t_pflush2
diff --git a/testpar/Makefile.am b/testpar/Makefile.am
index b87c1df..7029bd5 100644
--- a/testpar/Makefile.am
+++ b/testpar/Makefile.am
@@ -23,7 +23,7 @@ AM_CPPFLAGS+=-I$(top_srcdir)/src -I$(top_srcdir)/test
# Test programs. These are our main targets.
#
-TEST_PROG_PARA=t_mpi testphdf5 t_cache t_cache_image t_pflush1 t_pflush2 t_pshutdown t_prestart t_init_term t_shapesame
+TEST_PROG_PARA=t_mpi t_bigio testphdf5 t_cache t_cache_image t_pflush1 t_pflush2 t_pshutdown t_prestart t_init_term t_shapesame
check_PROGRAMS = $(TEST_PROG_PARA)
diff --git a/testpar/t_bigio.c b/testpar/t_bigio.c
new file mode 100644
index 0000000..a4a1323
--- /dev/null
+++ b/testpar/t_bigio.c
@@ -0,0 +1,2153 @@
+
+#include "hdf5.h"
+#include "testphdf5.h"
+#include "H5Dprivate.h" /* For Chunk tests */
+
+// int TestVerbosity = VERBO_LO; /* Default Verbosity is Low */
+
+/* Constants definitions */
+#define MAX_ERR_REPORT 10 /* Maximum number of errors reported */
+
+/* Define some handy debugging shorthands, routines, ... */
+/* debugging tools */
+
+#define MAINPROCESS (!mpi_rank) /* define process 0 as main process */
+
+/* Constants definitions */
+#define RANK 2
+
+#define IN_ORDER 1
+#define OUT_OF_ORDER 2
+
+#define DATASET1 "DSET1"
+#define DATASET2 "DSET2"
+#define DATASET3 "DSET3"
+#define DATASET4 "DSET4"
+#define DATASET5 "DSET5"
+#define DXFER_COLLECTIVE_IO 0x1 /* Collective IO*/
+#define DXFER_INDEPENDENT_IO 0x2 /* Independent IO collectively */
+#define DXFER_BIGCOUNT 536870916
+
+#define HYPER 1
+#define POINT 2
+#define ALL 3
+
+/* Dataset data type. Int's can be easily octo dumped. */
+typedef hsize_t B_DATATYPE;
+
+int facc_type = FACC_MPIO; /*Test file access type */
+int dxfer_coll_type = DXFER_COLLECTIVE_IO;
+size_t bigcount = DXFER_BIGCOUNT;
+char filename[20] = "bigio_test.h5";
+int nerrors = 0;
+int mpi_size, mpi_rank;
+
+hsize_t space_dim1 = SPACE_DIM1 * 256; // 4096
+hsize_t space_dim2 = SPACE_DIM2;
+
+static void coll_chunktest(const char* filename, int chunk_factor, int select_factor,
+ int api_option, int file_selection, int mem_selection, int mode);
+hid_t create_faccess_plist(MPI_Comm comm, MPI_Info info, int l_facc_type);
+
+/*
+ * Setup the coordinates for point selection.
+ */
+static void
+set_coords(hsize_t start[],
+ hsize_t count[],
+ hsize_t stride[],
+ hsize_t block[],
+ size_t num_points,
+ hsize_t coords[],
+ int order)
+{
+ hsize_t i,j, k = 0, m ,n, s1 ,s2;
+
+ if(OUT_OF_ORDER == order)
+ k = (num_points * RANK) - 1;
+ else if(IN_ORDER == order)
+ k = 0;
+
+ s1 = start[0];
+ s2 = start[1];
+
+ for(i = 0 ; i < count[0]; i++)
+ for(j = 0 ; j < count[1]; j++)
+ for(m = 0 ; m < block[0]; m++)
+ for(n = 0 ; n < block[1]; n++)
+ if(OUT_OF_ORDER == order) {
+ coords[k--] = s2 + (stride[1] * j) + n;
+ coords[k--] = s1 + (stride[0] * i) + m;
+ }
+ else if(IN_ORDER == order) {
+ coords[k++] = s1 + stride[0] * i + m;
+ coords[k++] = s2 + stride[1] * j + n;
+ }
+}
+
+/*
+ * Fill the dataset with trivial data for testing.
+ * Assume dimension rank is 2 and data is stored contiguous.
+ */
+static void
+fill_datasets(hsize_t start[], hsize_t block[], B_DATATYPE * dataset)
+{
+ B_DATATYPE *dataptr = dataset;
+ hsize_t i, j;
+
+ /* put some trivial data in the data_array */
+ for (i=0; i < block[0]; i++){
+ for (j=0; j < block[1]; j++){
+ *dataptr = (B_DATATYPE)((i+start[0])*100 + (j+start[1]+1));
+ dataptr++;
+ }
+ }
+}
+
+/*
+ * Setup the coordinates for point selection.
+ */
+void point_set(hsize_t start[],
+ hsize_t count[],
+ hsize_t stride[],
+ hsize_t block[],
+ size_t num_points,
+ hsize_t coords[],
+ int order)
+{
+ hsize_t i,j, k = 0, m ,n, s1 ,s2;
+
+ HDcompile_assert(RANK == 2);
+
+ if(OUT_OF_ORDER == order)
+ k = (num_points * RANK) - 1;
+ else if(IN_ORDER == order)
+ k = 0;
+
+ s1 = start[0];
+ s2 = start[1];
+
+ for(i = 0 ; i < count[0]; i++)
+ for(j = 0 ; j < count[1]; j++)
+ for(m = 0 ; m < block[0]; m++)
+ for(n = 0 ; n < block[1]; n++)
+ if(OUT_OF_ORDER == order) {
+ coords[k--] = s2 + (stride[1] * j) + n;
+ coords[k--] = s1 + (stride[0] * i) + m;
+ }
+ else if(IN_ORDER == order) {
+ coords[k++] = s1 + stride[0] * i + m;
+ coords[k++] = s2 + stride[1] * j + n;
+ }
+
+ if(VERBOSE_MED) {
+ printf("start[]=(%lu, %lu), count[]=(%lu, %lu), stride[]=(%lu, %lu), block[]=(%lu, %lu), total datapoints=%lu\n",
+ (unsigned long)start[0], (unsigned long)start[1], (unsigned long)count[0], (unsigned long)count[1],
+ (unsigned long)stride[0], (unsigned long)stride[1], (unsigned long)block[0], (unsigned long)block[1],
+ (unsigned long)(block[0] * block[1] * count[0] * count[1]));
+ k = 0;
+ for(i = 0; i < num_points ; i++) {
+ printf("(%d, %d)\n", (int)coords[k], (int)coords[k + 1]);
+ k += 2;
+ }
+ }
+}
+
+/*
+ * Print the content of the dataset.
+ */
+static void
+dataset_print(hsize_t start[], hsize_t block[], B_DATATYPE * dataset)
+{
+ B_DATATYPE *dataptr = dataset;
+ hsize_t i, j;
+
+ /* print the column heading */
+ printf("%-8s", "Cols:");
+ for (j=0; j < block[1]; j++){
+ printf("%3lu ", (unsigned long)(start[1]+j));
+ }
+ printf("\n");
+
+ /* print the slab data */
+ for (i=0; i < block[0]; i++){
+ printf("Row %2lu: ", (unsigned long)(i+start[0]));
+ for (j=0; j < block[1]; j++){
+ printf("%llu ", *dataptr++);
+ }
+ printf("\n");
+ }
+}
+
+
+/*
+ * Print the content of the dataset.
+ */
+static int
+verify_data(hsize_t start[], hsize_t count[], hsize_t stride[], hsize_t block[], B_DATATYPE *dataset, B_DATATYPE *original)
+{
+ hsize_t i, j;
+ int vrfyerrs;
+
+ /* print it if VERBOSE_MED */
+ if(VERBOSE_MED) {
+ printf("verify_data dumping:::\n");
+ printf("start(%lu, %lu), count(%lu, %lu), stride(%lu, %lu), block(%lu, %lu)\n",
+ (unsigned long)start[0], (unsigned long)start[1], (unsigned long)count[0], (unsigned long)count[1],
+ (unsigned long)stride[0], (unsigned long)stride[1], (unsigned long)block[0], (unsigned long)block[1]);
+ printf("original values:\n");
+ dataset_print(start, block, original);
+ printf("compared values:\n");
+ dataset_print(start, block, dataset);
+ }
+
+ vrfyerrs = 0;
+ for (i=0; i < block[0]; i++){
+ for (j=0; j < block[1]; j++){
+ if(*dataset != *original){
+ if(vrfyerrs++ < MAX_ERR_REPORT || VERBOSE_MED){
+ printf("Dataset Verify failed at [%lu][%lu](row %lu, col %lu): expect %llu, got %llu\n",
+ (unsigned long)i, (unsigned long)j,
+ (unsigned long)(i+start[0]), (unsigned long)(j+start[1]),
+ *(original), *(dataset));
+ }
+ dataset++;
+ original++;
+ }
+ }
+ }
+ if(vrfyerrs > MAX_ERR_REPORT && !VERBOSE_MED)
+ printf("[more errors ...]\n");
+ if(vrfyerrs)
+ printf("%d errors found in verify_data\n", vrfyerrs);
+ return(vrfyerrs);
+}
+
+/* Set up the selection */
+static void
+ccslab_set(int mpi_rank,
+ int mpi_size,
+ hsize_t start[],
+ hsize_t count[],
+ hsize_t stride[],
+ hsize_t block[],
+ int mode)
+{
+
+ switch (mode){
+
+ case BYROW_CONT:
+ /* Each process takes a slabs of rows. */
+ block[0] = 1;
+ block[1] = 1;
+ stride[0] = 1;
+ stride[1] = 1;
+ count[0] = space_dim1;
+ count[1] = space_dim2;
+ start[0] = mpi_rank*count[0];
+ start[1] = 0;
+
+ break;
+
+ case BYROW_DISCONT:
+ /* Each process takes several disjoint blocks. */
+ block[0] = 1;
+ block[1] = 1;
+ stride[0] = 3;
+ stride[1] = 3;
+ count[0] = space_dim1/(stride[0]*block[0]);
+ count[1] = (space_dim2)/(stride[1]*block[1]);
+ start[0] = space_dim1*mpi_rank;
+ start[1] = 0;
+
+ break;
+
+ case BYROW_SELECTNONE:
+ /* Each process takes a slabs of rows, there are
+ no selections for the last process. */
+ block[0] = 1;
+ block[1] = 1;
+ stride[0] = 1;
+ stride[1] = 1;
+ count[0] = ((mpi_rank >= MAX(1,(mpi_size-2)))?0:space_dim1);
+ count[1] = space_dim2;
+ start[0] = mpi_rank*count[0];
+ start[1] = 0;
+
+ break;
+
+ case BYROW_SELECTUNBALANCE:
+ /* The first one-third of the number of processes only
+ select top half of the domain, The rest will select the bottom
+ half of the domain. */
+
+ block[0] = 1;
+ count[0] = 2;
+ stride[0] = space_dim1*mpi_size/4+1;
+ block[1] = space_dim2;
+ count[1] = 1;
+ start[1] = 0;
+ stride[1] = 1;
+ if((mpi_rank *3)<(mpi_size*2)) start[0] = mpi_rank;
+ else start[0] = 1 + space_dim1*mpi_size/2 + (mpi_rank-2*mpi_size/3);
+ break;
+
+ case BYROW_SELECTINCHUNK:
+ /* Each process will only select one chunk */
+
+ block[0] = 1;
+ count[0] = 1;
+ start[0] = mpi_rank*space_dim1;
+ stride[0]= 1;
+ block[1] = space_dim2;
+ count[1] = 1;
+ stride[1]= 1;
+ start[1] = 0;
+
+ break;
+
+ default:
+ /* Unknown mode. Set it to cover the whole dataset. */
+ block[0] = space_dim1*mpi_size;
+ block[1] = space_dim2;
+ stride[0] = block[0];
+ stride[1] = block[1];
+ count[0] = 1;
+ count[1] = 1;
+ start[0] = 0;
+ start[1] = 0;
+
+ break;
+ }
+ if (VERBOSE_MED){
+ printf("start[]=(%lu,%lu), count[]=(%lu,%lu), stride[]=(%lu,%lu), block[]=(%lu,%lu), total datapoints=%lu\n",
+ (unsigned long)start[0], (unsigned long)start[1], (unsigned long)count[0], (unsigned long)count[1],
+ (unsigned long)stride[0], (unsigned long)stride[1], (unsigned long)block[0], (unsigned long)block[1],
+ (unsigned long)(block[0]*block[1]*count[0]*count[1]));
+ }
+}
+
+
+/*
+ * Fill the dataset with trivial data for testing.
+ * Assume dimension rank is 2.
+ */
+static void
+ccdataset_fill(hsize_t start[],
+ hsize_t stride[],
+ hsize_t count[],
+ hsize_t block[],
+ DATATYPE * dataset,
+ int mem_selection)
+{
+ DATATYPE *dataptr = dataset;
+ DATATYPE *tmptr;
+ hsize_t i,j,k1,k2,k=0;
+ /* put some trivial data in the data_array */
+ tmptr = dataptr;
+
+ /* assign the disjoint block (two-dimensional)data array value
+ through the pointer */
+
+ for (k1 = 0; k1 < count[0]; k1++) {
+ for(i = 0; i < block[0]; i++) {
+ for(k2 = 0; k2 < count[1]; k2++) {
+ for(j = 0;j < block[1]; j++) {
+
+ if (ALL != mem_selection) {
+ dataptr = tmptr + ((start[0]+k1*stride[0]+i)*space_dim2+
+ start[1]+k2*stride[1]+j);
+ }
+ else {
+ dataptr = tmptr + k;
+ k++;
+ }
+
+ *dataptr = (DATATYPE)(k1+k2+i+j);
+ }
+ }
+ }
+ }
+}
+
+/*
+ * Print the first block of the content of the dataset.
+ */
+static void
+ccdataset_print(hsize_t start[],
+ hsize_t block[],
+ DATATYPE * dataset)
+
+{
+ DATATYPE *dataptr = dataset;
+ hsize_t i, j;
+
+ /* print the column heading */
+ printf("Print only the first block of the dataset\n");
+ printf("%-8s", "Cols:");
+ for (j=0; j < block[1]; j++){
+ printf("%3lu ", (unsigned long)(start[1]+j));
+ }
+ printf("\n");
+
+ /* print the slab data */
+ for (i=0; i < block[0]; i++){
+ printf("Row %2lu: ", (unsigned long)(i+start[0]));
+ for (j=0; j < block[1]; j++){
+ printf("%03d ", *dataptr++);
+ }
+ printf("\n");
+ }
+}
+
+/*
+ * Print the content of the dataset.
+ */
+static int
+ccdataset_vrfy(hsize_t start[],
+ hsize_t count[],
+ hsize_t stride[],
+ hsize_t block[],
+ DATATYPE *dataset,
+ DATATYPE *original,
+ int mem_selection)
+{
+ hsize_t i, j,k1,k2,k=0;
+ int vrfyerrs;
+ DATATYPE *dataptr,*oriptr;
+
+ /* print it if VERBOSE_MED */
+ if (VERBOSE_MED) {
+ printf("dataset_vrfy dumping:::\n");
+ printf("start(%lu, %lu), count(%lu, %lu), stride(%lu, %lu), block(%lu, %lu)\n",
+ (unsigned long)start[0], (unsigned long)start[1], (unsigned long)count[0], (unsigned long)count[1],
+ (unsigned long)stride[0], (unsigned long)stride[1], (unsigned long)block[0], (unsigned long)block[1]);
+ printf("original values:\n");
+ ccdataset_print(start, block, original);
+ printf("compared values:\n");
+ ccdataset_print(start, block, dataset);
+ }
+
+ vrfyerrs = 0;
+
+ for (k1=0;k1<count[0];k1++) {
+ for(i=0;i<block[0];i++) {
+ for(k2=0; k2<count[1];k2++) {
+ for(j=0;j<block[1];j++) {
+ if (ALL != mem_selection) {
+ dataptr = dataset + ((start[0]+k1*stride[0]+i)*space_dim2+
+ start[1]+k2*stride[1]+j);
+ oriptr = original + ((start[0]+k1*stride[0]+i)*space_dim2+
+ start[1]+k2*stride[1]+j);
+ }
+ else {
+ dataptr = dataset + k;
+ oriptr = original + k;
+ k++;
+ }
+ if (*dataptr != *oriptr){
+ if (vrfyerrs++ < MAX_ERR_REPORT || VERBOSE_MED){
+ printf("Dataset Verify failed at [%lu][%lu]: expect %d, got %d\n",
+ (unsigned long)i, (unsigned long)j,
+ *(oriptr), *(dataptr));
+ }
+ }
+ }
+ }
+ }
+ }
+ if (vrfyerrs > MAX_ERR_REPORT && !VERBOSE_MED)
+ printf("[more errors ...]\n");
+ if (vrfyerrs)
+ printf("%d errors found in ccdataset_vrfy\n", vrfyerrs);
+ return(vrfyerrs);
+}
+
+/*
+ * Example of using the parallel HDF5 library to create two datasets
+ * in one HDF5 file with collective parallel access support.
+ * The Datasets are of sizes (number-of-mpi-processes x dim0) x dim1.
+ * Each process controls only a slab of size dim0 x dim1 within each
+ * dataset. [Note: not so yet. Datasets are of sizes dim0xdim1 and
+ * each process controls a hyperslab within.]
+ */
+
+static void
+dataset_big_write(void)
+{
+
+ hid_t xfer_plist; /* Dataset transfer properties list */
+ hid_t sid; /* Dataspace ID */
+ hid_t file_dataspace; /* File dataspace ID */
+ hid_t mem_dataspace; /* memory dataspace ID */
+ hid_t dataset;
+ hid_t datatype; /* Datatype ID */
+ hsize_t dims[RANK]; /* dataset dim sizes */
+ hsize_t start[RANK]; /* for hyperslab setting */
+ hsize_t count[RANK], stride[RANK]; /* for hyperslab setting */
+ hsize_t block[RANK]; /* for hyperslab setting */
+ hsize_t *coords = NULL;
+ int i;
+ herr_t ret; /* Generic return value */
+ hid_t fid; /* HDF5 file ID */
+ hid_t acc_tpl; /* File access templates */
+ hsize_t h;
+ size_t num_points;
+ B_DATATYPE * wdata;
+
+
+ /* allocate memory for data buffer */
+ wdata = (B_DATATYPE *)malloc(bigcount*sizeof(B_DATATYPE));
+ VRFY((wdata != NULL), "wdata malloc succeeded");
+
+ /* setup file access template */
+ acc_tpl = H5Pcreate (H5P_FILE_ACCESS);
+ VRFY((acc_tpl >= 0), "H5P_FILE_ACCESS");
+ H5Pset_fapl_mpio(acc_tpl, MPI_COMM_WORLD, MPI_INFO_NULL);
+
+ /* create the file collectively */
+ fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, acc_tpl);
+ VRFY((fid >= 0), "H5Fcreate succeeded");
+
+ /* Release file-access template */
+ ret = H5Pclose(acc_tpl);
+ VRFY((ret >= 0), "");
+
+
+ /* Each process takes a slabs of rows. */
+ printf("\nTesting Dataset1 write by ROW\n");
+ /* Create a large dataset */
+ dims[0] = bigcount;
+ dims[1] = mpi_size;
+ sid = H5Screate_simple (RANK, dims, NULL);
+ VRFY((sid >= 0), "H5Screate_simple succeeded");
+ dataset = H5Dcreate2(fid, DATASET1, H5T_NATIVE_LLONG, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+ VRFY((dataset >= 0), "H5Dcreate2 succeeded");
+ H5Sclose(sid);
+
+ block[0] = dims[0]/mpi_size;
+ block[1] = dims[1];
+ stride[0] = block[0];
+ stride[1] = block[1];
+ count[0] = 1;
+ count[1] = 1;
+ start[0] = mpi_rank*block[0];
+ start[1] = 0;
+
+ /* create a file dataspace independently */
+ file_dataspace = H5Dget_space (dataset);
+ VRFY((file_dataspace >= 0), "H5Dget_space succeeded");
+ ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block);
+ VRFY((ret >= 0), "H5Sset_hyperslab succeeded");
+
+ /* create a memory dataspace independently */
+ mem_dataspace = H5Screate_simple (RANK, block, NULL);
+ VRFY((mem_dataspace >= 0), "");
+
+ /* fill the local slab with some trivial data */
+ fill_datasets(start, block, wdata);
+ MESG("data_array initialized");
+ if(VERBOSE_MED){
+ MESG("data_array created");
+ dataset_print(start, block, wdata);
+ }
+
+ /* set up the collective transfer properties list */
+ xfer_plist = H5Pcreate (H5P_DATASET_XFER);
+ VRFY((xfer_plist >= 0), "H5Pcreate xfer succeeded");
+ ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
+ VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
+ if(dxfer_coll_type == DXFER_INDEPENDENT_IO) {
+ ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO);
+ VRFY((ret>= 0),"set independent IO collectively succeeded");
+ }
+
+ /* write data collectively */
+ MESG("writeAll by Row");
+ {
+ int j,k =0;
+ for (i=0; i < block[0]; i++){
+ for (j=0; j < block[1]; j++){
+ if(k < 10) {
+ printf("%lld ", wdata[k]);
+ k++;
+ }
+ }
+ }
+ printf("\n");
+ }
+
+ ret = H5Dwrite(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace,
+ xfer_plist, wdata);
+ VRFY((ret >= 0), "H5Dwrite dataset1 succeeded");
+
+ /* release all temporary handles. */
+ H5Sclose(file_dataspace);
+ H5Sclose(mem_dataspace);
+ H5Pclose(xfer_plist);
+
+ ret = H5Dclose(dataset);
+ VRFY((ret >= 0), "H5Dclose1 succeeded");
+
+
+
+ /* Each process takes a slabs of cols. */
+ printf("\nTesting Dataset2 write by COL\n");
+ /* Create a large dataset */
+ dims[0] = bigcount;
+ dims[1] = mpi_size;
+ sid = H5Screate_simple (RANK, dims, NULL);
+ VRFY((sid >= 0), "H5Screate_simple succeeded");
+ dataset = H5Dcreate2(fid, DATASET2, H5T_NATIVE_LLONG, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+ VRFY((dataset >= 0), "H5Dcreate2 succeeded");
+ H5Sclose(sid);
+
+ block[0] = dims[0];
+ block[1] = dims[1]/mpi_size;
+ stride[0] = block[0];
+ stride[1] = block[1];
+ count[0] = 1;
+ count[1] = 1;
+ start[0] = 0;
+ start[1] = mpi_rank*block[1];
+
+ /* create a file dataspace independently */
+ file_dataspace = H5Dget_space (dataset);
+ VRFY((file_dataspace >= 0), "H5Dget_space succeeded");
+ ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block);
+ VRFY((ret >= 0), "H5Sset_hyperslab succeeded");
+
+ /* create a memory dataspace independently */
+ mem_dataspace = H5Screate_simple (RANK, block, NULL);
+ VRFY((mem_dataspace >= 0), "");
+
+ /* fill the local slab with some trivial data */
+ fill_datasets(start, block, wdata);
+ MESG("data_array initialized");
+ if(VERBOSE_MED){
+ MESG("data_array created");
+ dataset_print(start, block, wdata);
+ }
+
+ /* set up the collective transfer properties list */
+ xfer_plist = H5Pcreate (H5P_DATASET_XFER);
+ VRFY((xfer_plist >= 0), "H5Pcreate xfer succeeded");
+ ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
+ VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
+ if(dxfer_coll_type == DXFER_INDEPENDENT_IO) {
+ ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO);
+ VRFY((ret>= 0),"set independent IO collectively succeeded");
+ }
+
+ /* write data collectively */
+ MESG("writeAll by Col");
+ {
+ int j,k =0;
+ for (i=0; i < block[0]; i++){
+ for (j=0; j < block[1]; j++){
+ if(k < 10) {
+ printf("%lld ", wdata[k]);
+ k++;
+ }
+ }
+ }
+ printf("\n");
+ }
+
+ ret = H5Dwrite(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace,
+ xfer_plist, wdata);
+ VRFY((ret >= 0), "H5Dwrite dataset1 succeeded");
+
+ /* release all temporary handles. */
+ H5Sclose(file_dataspace);
+ H5Sclose(mem_dataspace);
+ H5Pclose(xfer_plist);
+
+ ret = H5Dclose(dataset);
+ VRFY((ret >= 0), "H5Dclose1 succeeded");
+
+
+
+ /* ALL selection */
+ printf("\nTesting Dataset3 write select ALL proc 0, NONE others\n");
+ /* Create a large dataset */
+ dims[0] = bigcount;
+ dims[1] = 1;
+ sid = H5Screate_simple (RANK, dims, NULL);
+ VRFY((sid >= 0), "H5Screate_simple succeeded");
+ dataset = H5Dcreate2(fid, DATASET3, H5T_NATIVE_LLONG, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+ VRFY((dataset >= 0), "H5Dcreate2 succeeded");
+ H5Sclose(sid);
+
+ /* create a file dataspace independently */
+ file_dataspace = H5Dget_space (dataset);
+ VRFY((file_dataspace >= 0), "H5Dget_space succeeded");
+ if(MAINPROCESS) {
+ ret = H5Sselect_all(file_dataspace);
+ VRFY((ret >= 0), "H5Sset_all succeeded");
+ }
+ else {
+ ret = H5Sselect_none(file_dataspace);
+ VRFY((ret >= 0), "H5Sset_none succeeded");
+ }
+
+ /* create a memory dataspace independently */
+ mem_dataspace = H5Screate_simple (RANK, dims, NULL);
+ VRFY((mem_dataspace >= 0), "");
+ if(!MAINPROCESS) {
+ ret = H5Sselect_none(mem_dataspace);
+ VRFY((ret >= 0), "H5Sset_none succeeded");
+ }
+
+ /* set up the collective transfer properties list */
+ xfer_plist = H5Pcreate (H5P_DATASET_XFER);
+ VRFY((xfer_plist >= 0), "H5Pcreate xfer succeeded");
+ ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
+ VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
+ if(dxfer_coll_type == DXFER_INDEPENDENT_IO) {
+ ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO);
+ VRFY((ret>= 0),"set independent IO collectively succeeded");
+ }
+
+ /* fill the local slab with some trivial data */
+ fill_datasets(start, dims, wdata);
+ MESG("data_array initialized");
+ if(VERBOSE_MED){
+ MESG("data_array created");
+ }
+
+ /* write data collectively */
+ MESG("writeAll by process 0");
+ {
+ int j,k =0;
+ for (i=0; i < block[0]; i++){
+ for (j=0; j < block[1]; j++){
+ if(k < 10) {
+ printf("%lld ", wdata[k]);
+ k++;
+ }
+ }
+ }
+ printf("\n");
+ }
+
+ ret = H5Dwrite(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace,
+ xfer_plist, wdata);
+ VRFY((ret >= 0), "H5Dwrite dataset1 succeeded");
+
+ /* release all temporary handles. */
+ H5Sclose(file_dataspace);
+ H5Sclose(mem_dataspace);
+ H5Pclose(xfer_plist);
+
+ ret = H5Dclose(dataset);
+ VRFY((ret >= 0), "H5Dclose1 succeeded");
+
+ /* Point selection */
+ printf("\nTesting Dataset4 write point selection\n");
+ /* Create a large dataset */
+ dims[0] = bigcount;
+ dims[1] = mpi_size * 4;
+ sid = H5Screate_simple (RANK, dims, NULL);
+ VRFY((sid >= 0), "H5Screate_simple succeeded");
+ dataset = H5Dcreate2(fid, DATASET4, H5T_NATIVE_LLONG, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+ VRFY((dataset >= 0), "H5Dcreate2 succeeded");
+ H5Sclose(sid);
+
+ block[0] = dims[0]/2;
+ block[1] = 2;
+ stride[0] = dims[0]/2;
+ stride[1] = 2;
+ count[0] = 1;
+ count[1] = 1;
+ start[0] = 0;
+ start[1] = dims[1]/mpi_size * mpi_rank;
+
+ num_points = bigcount;
+
+ coords = (hsize_t *)malloc(num_points * RANK * sizeof(hsize_t));
+ VRFY((coords != NULL), "coords malloc succeeded");
+
+ set_coords (start, count, stride, block, num_points, coords, IN_ORDER);
+ /* create a file dataspace */
+ file_dataspace = H5Dget_space (dataset);
+ VRFY((file_dataspace >= 0), "H5Dget_space succeeded");
+ ret = H5Sselect_elements(file_dataspace, H5S_SELECT_SET, num_points, coords);
+ VRFY((ret >= 0), "H5Sselect_elements succeeded");
+
+ if(coords) free(coords);
+
+ fill_datasets(start, block, wdata);
+ MESG("data_array initialized");
+ if(VERBOSE_MED){
+ MESG("data_array created");
+ dataset_print(start, block, wdata);
+ }
+
+ /* create a memory dataspace */
+ mem_dataspace = H5Screate_simple (1, &bigcount, NULL);
+ VRFY((mem_dataspace >= 0), "");
+
+ /* set up the collective transfer properties list */
+ xfer_plist = H5Pcreate (H5P_DATASET_XFER);
+ VRFY((xfer_plist >= 0), "H5Pcreate xfer succeeded");
+ ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
+ VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
+ if(dxfer_coll_type == DXFER_INDEPENDENT_IO) {
+ ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO);
+ VRFY((ret>= 0),"set independent IO collectively succeeded");
+ }
+
+ ret = H5Dwrite(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace,
+ xfer_plist, wdata);
+ VRFY((ret >= 0), "H5Dwrite dataset1 succeeded");
+
+ /* release all temporary handles. */
+ H5Sclose(file_dataspace);
+ H5Sclose(mem_dataspace);
+ H5Pclose(xfer_plist);
+
+ ret = H5Dclose(dataset);
+ VRFY((ret >= 0), "H5Dclose1 succeeded");
+
+ /* Irregular selection */
+ /* Need larger memory for data buffer */
+ free(wdata);
+#if 0
+ wdata = (B_DATATYPE *)malloc(bigcount*4*sizeof(B_DATATYPE));
+ VRFY((wdata != NULL), "wdata malloc succeeded");
+
+ printf("\nTesting Dataset5 write irregular selection\n");
+ /* Create a large dataset */
+ dims[0] = bigcount/6;
+ dims[1] = mpi_size * 4;
+ sid = H5Screate_simple (RANK, dims, NULL);
+ VRFY((sid >= 0), "H5Screate_simple succeeded");
+ dataset = H5Dcreate2(fid, DATASET5, H5T_NATIVE_LLONG, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+ VRFY((dataset >= 0), "H5Dcreate2 succeeded");
+ H5Sclose(sid);
+
+ /* first select 1 col in this procs splice */
+ block[0] = dims[0];
+ block[1] = 1;
+ stride[0] = block[0];
+ stride[1] = block[1];
+ count[0] = 1;
+ count[1] = 1;
+ start[0] = 0;
+ start[1] = mpi_rank * 4;
+
+ /* create a file dataspace */
+ file_dataspace = H5Dget_space (dataset);
+ VRFY((file_dataspace >= 0), "H5Dget_space succeeded");
+
+ dims[1] = 4;
+ /* create a memory dataspace */
+ mem_dataspace = H5Screate_simple (RANK, dims, NULL);
+ VRFY((mem_dataspace >= 0), "");
+
+ ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block);
+ VRFY((ret >= 0), "H5Sset_hyperslab succeeded");
+
+ start[1] = 0;
+ ret = H5Sselect_hyperslab(mem_dataspace, H5S_SELECT_SET, start, stride, count, block);
+ VRFY((ret >= 0), "H5Sset_hyperslab succeeded");
+
+ /* select every other row in the process splice and OR it with
+ the col selection to create an irregular selection */
+ for(h=0 ; h<dims[0] ; h+=2) {
+ block[0] = 1;
+ block[1] = 4;
+ stride[0] = block[0];
+ stride[1] = block[1];
+ count[0] = 1;
+ count[1] = 1;
+ start[0] = h;
+ start[1] = mpi_rank * 4;
+
+ ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_OR, start, stride, count, block);
+ VRFY((ret >= 0), "H5Sset_hyperslab succeeded");
+
+ start[1] = 0;
+ ret = H5Sselect_hyperslab(mem_dataspace, H5S_SELECT_OR, start, stride, count, block);
+ VRFY((ret >= 0), "H5Sset_hyperslab succeeded");
+ }
+ printf("Setting up for collective transfer\n");
+ /* set up the collective transfer properties list */
+ xfer_plist = H5Pcreate (H5P_DATASET_XFER);
+ VRFY((xfer_plist >= 0), "H5Pcreate xfer succeeded");
+ ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
+ VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
+ if(dxfer_coll_type == DXFER_INDEPENDENT_IO) {
+ ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO);
+ VRFY((ret>= 0),"set independent IO collectively succeeded");
+ }
+
+ /* fill the local slab with some trivial data */
+ fill_datasets(start, dims, wdata);
+ MESG("data_array initialized");
+ if(VERBOSE_MED){
+ MESG("data_array created");
+ }
+
+ ret = H5Dwrite(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace,
+ xfer_plist, wdata);
+ VRFY((ret >= 0), "H5Dwrite dataset1 succeeded");
+
+ /* release all temporary handles. */
+ H5Sclose(file_dataspace);
+ H5Sclose(mem_dataspace);
+ H5Pclose(xfer_plist);
+
+ ret = H5Dclose(dataset);
+ VRFY((ret >= 0), "H5Dclose1 succeeded");
+
+ free(wdata);
+#endif
+ H5Fclose(fid);
+}
+
+/*
+ * Example of using the parallel HDF5 library to read two datasets
+ * in one HDF5 file with collective parallel access support.
+ * The Datasets are of sizes (number-of-mpi-processes x dim0) x dim1.
+ * Each process controls only a slab of size dim0 x dim1 within each
+ * dataset. [Note: not so yet. Datasets are of sizes dim0xdim1 and
+ * each process controls a hyperslab within.]
+ */
+
+static void
+dataset_big_read(void)
+{
+ hid_t fid; /* HDF5 file ID */
+ hid_t acc_tpl; /* File access templates */
+ hid_t xfer_plist; /* Dataset transfer properties list */
+ hid_t file_dataspace; /* File dataspace ID */
+ hid_t mem_dataspace; /* memory dataspace ID */
+ hid_t dataset;
+ B_DATATYPE *rdata = NULL; /* data buffer */
+ B_DATATYPE *wdata = NULL; /* expected data buffer */
+ hsize_t dims[RANK]; /* dataset dim sizes */
+ hsize_t start[RANK]; /* for hyperslab setting */
+ hsize_t count[RANK], stride[RANK]; /* for hyperslab setting */
+ hsize_t block[RANK]; /* for hyperslab setting */
+ int i,j,k;
+ hsize_t h;
+ size_t num_points;
+ hsize_t *coords = NULL;
+ herr_t ret; /* Generic return value */
+
+ /* allocate memory for data buffer */
+ rdata = (B_DATATYPE *)malloc(bigcount*sizeof(B_DATATYPE));
+ VRFY((rdata != NULL), "rdata malloc succeeded");
+ wdata = (B_DATATYPE *)malloc(bigcount*sizeof(B_DATATYPE));
+ VRFY((wdata != NULL), "wdata malloc succeeded");
+
+ memset(rdata, 0, bigcount*sizeof(B_DATATYPE));
+
+ /* setup file access template */
+ acc_tpl = H5Pcreate (H5P_FILE_ACCESS);
+ VRFY((acc_tpl >= 0), "H5P_FILE_ACCESS");
+ H5Pset_fapl_mpio(acc_tpl, MPI_COMM_WORLD, MPI_INFO_NULL);
+
+ /* open the file collectively */
+ fid=H5Fopen(filename,H5F_ACC_RDONLY,acc_tpl);
+ VRFY((fid >= 0), "H5Fopen succeeded");
+
+ /* Release file-access template */
+ ret = H5Pclose(acc_tpl);
+ VRFY((ret >= 0), "");
+
+
+ printf("\nRead Testing Dataset1 by COL\n");
+ dataset = H5Dopen2(fid, DATASET1, H5P_DEFAULT);
+ VRFY((dataset >= 0), "H5Dopen2 succeeded");
+
+ dims[0] = bigcount;
+ dims[1] = mpi_size;
+ /* Each process takes a slabs of cols. */
+ block[0] = dims[0];
+ block[1] = dims[1]/mpi_size;
+ stride[0] = block[0];
+ stride[1] = block[1];
+ count[0] = 1;
+ count[1] = 1;
+ start[0] = 0;
+ start[1] = mpi_rank*block[1];
+
+ /* create a file dataspace independently */
+ file_dataspace = H5Dget_space (dataset);
+ VRFY((file_dataspace >= 0), "H5Dget_space succeeded");
+ ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block);
+ VRFY((ret >= 0), "H5Sset_hyperslab succeeded");
+
+ /* create a memory dataspace independently */
+ mem_dataspace = H5Screate_simple (RANK, block, NULL);
+ VRFY((mem_dataspace >= 0), "");
+
+ /* fill dataset with test data */
+ fill_datasets(start, block, wdata);
+ MESG("data_array initialized");
+ if(VERBOSE_MED){
+ MESG("data_array created");
+ }
+
+ /* set up the collective transfer properties list */
+ xfer_plist = H5Pcreate (H5P_DATASET_XFER);
+ VRFY((xfer_plist >= 0), "");
+ ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
+ VRFY((ret >= 0), "H5Pcreate xfer succeeded");
+ if(dxfer_coll_type == DXFER_INDEPENDENT_IO) {
+ ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO);
+ VRFY((ret>= 0),"set independent IO collectively succeeded");
+ }
+
+ /* read data collectively */
+ ret = H5Dread(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace,
+ xfer_plist, rdata);
+ VRFY((ret >= 0), "H5Dread dataset1 succeeded");
+
+ {
+ for (i=0; i < block[0]; i++){
+ for (j=0; j < block[1]; j++){
+ if(k < 10) {
+ printf("%lld ", rdata[k]);
+ k++;
+ }
+ }
+ }
+ printf("\n");
+ }
+
+ /* verify the read data with original expected data */
+ ret = verify_data(start, count, stride, block, rdata, wdata);
+ if(ret) {fprintf(stderr, "verify failed\n"); exit(1);}
+
+ /* release all temporary handles. */
+ H5Sclose(file_dataspace);
+ H5Sclose(mem_dataspace);
+ H5Pclose(xfer_plist);
+ ret = H5Dclose(dataset);
+ VRFY((ret >= 0), "H5Dclose1 succeeded");
+
+
+ printf("\nRead Testing Dataset2 by ROW\n");
+ memset(rdata, 0, bigcount*sizeof(B_DATATYPE));
+ dataset = H5Dopen2(fid, DATASET2, H5P_DEFAULT);
+ VRFY((dataset >= 0), "H5Dopen2 succeeded");
+
+ dims[0] = bigcount;
+ dims[1] = mpi_size;
+ /* Each process takes a slabs of rows. */
+ block[0] = dims[0]/mpi_size;
+ block[1] = dims[1];
+ stride[0] = block[0];
+ stride[1] = block[1];
+ count[0] = 1;
+ count[1] = 1;
+ start[0] = mpi_rank*block[0];
+ start[1] = 0;
+
+ /* create a file dataspace independently */
+ file_dataspace = H5Dget_space (dataset);
+ VRFY((file_dataspace >= 0), "H5Dget_space succeeded");
+ ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block);
+ VRFY((ret >= 0), "H5Sset_hyperslab succeeded");
+
+ /* create a memory dataspace independently */
+ mem_dataspace = H5Screate_simple (RANK, block, NULL);
+ VRFY((mem_dataspace >= 0), "");
+
+ /* fill dataset with test data */
+ fill_datasets(start, block, wdata);
+ MESG("data_array initialized");
+ if(VERBOSE_MED){
+ MESG("data_array created");
+ }
+
+ /* set up the collective transfer properties list */
+ xfer_plist = H5Pcreate (H5P_DATASET_XFER);
+ VRFY((xfer_plist >= 0), "");
+ ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
+ VRFY((ret >= 0), "H5Pcreate xfer succeeded");
+ if(dxfer_coll_type == DXFER_INDEPENDENT_IO) {
+ ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO);
+ VRFY((ret>= 0),"set independent IO collectively succeeded");
+ }
+
+ /* read data collectively */
+ ret = H5Dread(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace,
+ xfer_plist, rdata);
+ VRFY((ret >= 0), "H5Dread dataset2 succeeded");
+
+ {
+ for (i=0; i < block[0]; i++){
+ for (j=0; j < block[1]; j++){
+ if(k < 10) {
+ printf("%lld ", rdata[k]);
+ k++;
+ }
+ }
+ }
+ printf("\n");
+ }
+
+ /* verify the read data with original expected data */
+ ret = verify_data(start, count, stride, block, rdata, wdata);
+ if(ret) {fprintf(stderr, "verify failed\n"); exit(1);}
+
+ /* release all temporary handles. */
+ H5Sclose(file_dataspace);
+ H5Sclose(mem_dataspace);
+ H5Pclose(xfer_plist);
+ ret = H5Dclose(dataset);
+ VRFY((ret >= 0), "H5Dclose1 succeeded");
+
+
+ printf("\nRead Testing Dataset3 read select ALL proc 0, NONE others\n");
+ memset(rdata, 0, bigcount*sizeof(B_DATATYPE));
+ dataset = H5Dopen2(fid, DATASET3, H5P_DEFAULT);
+ VRFY((dataset >= 0), "H5Dopen2 succeeded");
+
+ dims[0] = bigcount;
+ dims[1] = 1;
+
+ /* create a file dataspace independently */
+ file_dataspace = H5Dget_space (dataset);
+ VRFY((file_dataspace >= 0), "H5Dget_space succeeded");
+ if(MAINPROCESS) {
+ ret = H5Sselect_all(file_dataspace);
+ VRFY((ret >= 0), "H5Sset_all succeeded");
+ }
+ else {
+ ret = H5Sselect_none(file_dataspace);
+ VRFY((ret >= 0), "H5Sset_none succeeded");
+ }
+
+ /* create a memory dataspace independently */
+ mem_dataspace = H5Screate_simple (RANK, dims, NULL);
+ VRFY((mem_dataspace >= 0), "");
+ if(!MAINPROCESS) {
+ ret = H5Sselect_none(mem_dataspace);
+ VRFY((ret >= 0), "H5Sset_none succeeded");
+ }
+
+ /* fill dataset with test data */
+ fill_datasets(start, dims, wdata);
+ MESG("data_array initialized");
+ if(VERBOSE_MED){
+ MESG("data_array created");
+ }
+
+ /* set up the collective transfer properties list */
+ xfer_plist = H5Pcreate (H5P_DATASET_XFER);
+ VRFY((xfer_plist >= 0), "");
+ ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
+ VRFY((ret >= 0), "H5Pcreate xfer succeeded");
+ if(dxfer_coll_type == DXFER_INDEPENDENT_IO) {
+ ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO);
+ VRFY((ret>= 0),"set independent IO collectively succeeded");
+ }
+
+ /* read data collectively */
+ ret = H5Dread(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace,
+ xfer_plist, rdata);
+ VRFY((ret >= 0), "H5Dread dataset3 succeeded");
+
+ {
+ for (i=0; i < block[0]; i++){
+ for (j=0; j < block[1]; j++){
+ if(k < 10) {
+ printf("%lld ", rdata[k]);
+ k++;
+ }
+ }
+ }
+ printf("\n");
+ }
+
+ if(MAINPROCESS) {
+ /* verify the read data with original expected data */
+ ret = verify_data(start, count, stride, block, rdata, wdata);
+ if(ret) {fprintf(stderr, "verify failed\n"); exit(1);}
+ }
+
+ /* release all temporary handles. */
+ H5Sclose(file_dataspace);
+ H5Sclose(mem_dataspace);
+ H5Pclose(xfer_plist);
+ ret = H5Dclose(dataset);
+ VRFY((ret >= 0), "H5Dclose1 succeeded");
+
+ printf("\nRead Testing Dataset4 with Point selection\n");
+ dataset = H5Dopen2(fid, DATASET4, H5P_DEFAULT);
+ VRFY((dataset >= 0), "H5Dopen2 succeeded");
+
+ dims[0] = bigcount;
+ dims[1] = mpi_size * 4;
+
+ block[0] = dims[0]/2;
+ block[1] = 2;
+ stride[0] = dims[0]/2;
+ stride[1] = 2;
+ count[0] = 1;
+ count[1] = 1;
+ start[0] = 0;
+ start[1] = dims[1]/mpi_size * mpi_rank;
+
+ fill_datasets(start, block, wdata);
+ MESG("data_array initialized");
+ if(VERBOSE_MED){
+ MESG("data_array created");
+ dataset_print(start, block, wdata);
+ }
+
+ num_points = bigcount;
+
+ coords = (hsize_t *)malloc(num_points * RANK * sizeof(hsize_t));
+ VRFY((coords != NULL), "coords malloc succeeded");
+
+ set_coords (start, count, stride, block, num_points, coords, IN_ORDER);
+ /* create a file dataspace */
+ file_dataspace = H5Dget_space (dataset);
+ VRFY((file_dataspace >= 0), "H5Dget_space succeeded");
+ ret = H5Sselect_elements(file_dataspace, H5S_SELECT_SET, num_points, coords);
+ VRFY((ret >= 0), "H5Sselect_elements succeeded");
+
+ if(coords) free(coords);
+
+ /* create a memory dataspace */
+ mem_dataspace = H5Screate_simple (1, &bigcount, NULL);
+ VRFY((mem_dataspace >= 0), "");
+
+ /* set up the collective transfer properties list */
+ xfer_plist = H5Pcreate (H5P_DATASET_XFER);
+ VRFY((xfer_plist >= 0), "");
+ ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
+ VRFY((ret >= 0), "H5Pcreate xfer succeeded");
+ if(dxfer_coll_type == DXFER_INDEPENDENT_IO) {
+ ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO);
+ VRFY((ret>= 0),"set independent IO collectively succeeded");
+ }
+
+ /* read data collectively */
+ ret = H5Dread(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace,
+ xfer_plist, rdata);
+ VRFY((ret >= 0), "H5Dread dataset1 succeeded");
+
+ ret = verify_data(start, count, stride, block, rdata, wdata);
+ if(ret) {fprintf(stderr, "verify failed\n"); exit(1);}
+
+ /* release all temporary handles. */
+ H5Sclose(file_dataspace);
+ H5Sclose(mem_dataspace);
+ H5Pclose(xfer_plist);
+ ret = H5Dclose(dataset);
+ VRFY((ret >= 0), "H5Dclose1 succeeded");
+
+ printf("\nRead Testing Dataset5 with Irregular selection\n");
+ /* Need larger memory for data buffer */
+ free(wdata);
+ free(rdata);
+#if 0
+ wdata = (B_DATATYPE *)malloc(bigcount*4*sizeof(B_DATATYPE));
+ VRFY((wdata != NULL), "wdata malloc succeeded");
+ rdata = (B_DATATYPE *)malloc(bigcount*4*sizeof(B_DATATYPE));
+ VRFY((rdata != NULL), "rdata malloc succeeded");
+
+ dataset = H5Dopen2(fid, DATASET5, H5P_DEFAULT);
+ VRFY((dataset >= 0), "H5Dopen2 succeeded");
+
+ dims[0] = bigcount;
+ dims[1] = mpi_size * 4;
+
+ /* first select 1 col in this proc splice */
+ block[0] = dims[0];
+ block[1] = 1;
+ stride[0] = block[0];
+ stride[1] = block[1];
+ count[0] = 1;
+ count[1] = 1;
+ start[0] = 0;
+ start[1] = mpi_rank * 4;
+
+ /* get file dataspace */
+ file_dataspace = H5Dget_space (dataset);
+ VRFY((file_dataspace >= 0), "H5Dget_space succeeded");
+
+ /* create a memory dataspace */
+ dims[1] = 4;
+ mem_dataspace = H5Screate_simple (RANK, dims, NULL);
+ VRFY((mem_dataspace >= 0), "");
+
+ ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block);
+ VRFY((ret >= 0), "H5Sset_hyperslab succeeded");
+
+ start[1] = 0;
+ ret = H5Sselect_hyperslab(mem_dataspace, H5S_SELECT_SET, start, stride, count, block);
+ VRFY((ret >= 0), "H5Sset_hyperslab succeeded");
+
+ /* select every other row in the process splice and OR it with
+ the col selection to create an irregular selection */
+ for(h=0 ; h<dims[0] ; h+=2) {
+ block[0] = 1;
+ block[1] = 4;
+ stride[0] = block[0];
+ stride[1] = block[1];
+ count[0] = 1;
+ count[1] = 1;
+ start[0] = h;
+ start[1] = mpi_rank * 4;
+
+ ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_OR, start, stride, count, block);
+ VRFY((ret >= 0), "H5Sset_hyperslab succeeded");
+
+ start[1] = 0;
+ ret = H5Sselect_hyperslab(mem_dataspace, H5S_SELECT_OR, start, stride, count, block);
+ VRFY((ret >= 0), "H5Sset_hyperslab succeeded");
+
+ //fprintf(stderr, "%d: %d - %d\n", mpi_rank, (int)h, (int)H5Sget_select_npoints(mem_dataspace));
+ }
+
+ /* set up the collective transfer properties list */
+ xfer_plist = H5Pcreate (H5P_DATASET_XFER);
+ VRFY((xfer_plist >= 0), "");
+ ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
+ VRFY((ret >= 0), "H5Pcreate xfer succeeded");
+ if(dxfer_coll_type == DXFER_INDEPENDENT_IO) {
+ ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO);
+ VRFY((ret>= 0),"set independent IO collectively succeeded");
+ }
+
+ /* read data collectively */
+ ret = H5Dread(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace,
+ xfer_plist, rdata);
+ VRFY((ret >= 0), "H5Dread dataset1 succeeded");
+
+ /* fill dataset with test data */
+ fill_datasets(start, dims, wdata);
+ MESG("data_array initialized");
+ if(VERBOSE_MED){
+ MESG("data_array created");
+ }
+
+
+
+ /* verify the read data with original expected data */
+ block[0] = dims[0];
+ block[1] = 1;
+ stride[0] = block[0];
+ stride[1] = block[1];
+ count[0] = 1;
+ count[1] = 1;
+ start[0] = 0;
+ start[1] = 0;
+ ret = verify_data(start, count, stride, block, rdata, wdata);
+ if(ret) {fprintf(stderr, "verify failed\n"); exit(1);}
+
+ for(h=0 ; h<dims[0] ; h+=2) {
+ block[0] = 1;
+ block[1] = 4;
+ stride[0] = block[0];
+ stride[1] = block[1];
+ count[0] = 1;
+ count[1] = 1;
+ start[0] = h;
+ start[1] = 0;
+ ret = verify_data(start, count, stride, block, rdata, wdata);
+ if(ret) {fprintf(stderr, "verify failed\n"); exit(1);}
+ }
+
+ /* release all temporary handles. */
+ H5Sclose(file_dataspace);
+ H5Sclose(mem_dataspace);
+ H5Pclose(xfer_plist);
+ ret = H5Dclose(dataset);
+ VRFY((ret >= 0), "H5Dclose1 succeeded");
+
+ H5Fclose(fid);
+
+ /* release data buffers */
+ if(rdata) free(rdata);
+ if(wdata) free(wdata);
+#endif
+} /* dataset_large_readAll */
+
+
+/*
+ * Create the appropriate File access property list
+ */
+hid_t
+create_faccess_plist(MPI_Comm comm, MPI_Info info, int l_facc_type)
+{
+ hid_t ret_pl = -1;
+ herr_t ret; /* generic return value */
+ int mpi_rank; /* mpi variables */
+
+ /* need the rank for error checking macros */
+ MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
+
+ ret_pl = H5Pcreate (H5P_FILE_ACCESS);
+ VRFY((ret_pl >= 0), "H5P_FILE_ACCESS");
+
+ if (l_facc_type == FACC_DEFAULT)
+ return (ret_pl);
+
+ if (l_facc_type == FACC_MPIO){
+ /* set Parallel access with communicator */
+ ret = H5Pset_fapl_mpio(ret_pl, comm, info);
+ VRFY((ret >= 0), "");
+ ret = H5Pset_all_coll_metadata_ops(ret_pl, TRUE);
+ VRFY((ret >= 0), "");
+ ret = H5Pset_coll_metadata_write(ret_pl, TRUE);
+ VRFY((ret >= 0), "");
+ return(ret_pl);
+ }
+
+ if (l_facc_type == (FACC_MPIO | FACC_SPLIT)){
+ hid_t mpio_pl;
+
+ mpio_pl = H5Pcreate (H5P_FILE_ACCESS);
+ VRFY((mpio_pl >= 0), "");
+ /* set Parallel access with communicator */
+ ret = H5Pset_fapl_mpio(mpio_pl, comm, info);
+ VRFY((ret >= 0), "");
+
+ /* setup file access template */
+ ret_pl = H5Pcreate (H5P_FILE_ACCESS);
+ VRFY((ret_pl >= 0), "");
+ /* set Parallel access with communicator */
+ ret = H5Pset_fapl_split(ret_pl, ".meta", mpio_pl, ".raw", mpio_pl);
+ VRFY((ret >= 0), "H5Pset_fapl_split succeeded");
+ H5Pclose(mpio_pl);
+ return(ret_pl);
+ }
+
+ /* unknown file access types */
+ return (ret_pl);
+}
+
+
+/*-------------------------------------------------------------------------
+ * Function: coll_chunk1
+ *
+ * Purpose: Wrapper to test the collective chunk IO for regular JOINT
+ selection with a single chunk
+ *
+ * Return: Success: 0
+ *
+ * Failure: -1
+ *
+ * Programmer: Unknown
+ * July 12th, 2004
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* ------------------------------------------------------------------------
+ * Descriptions for the selection: One big singluar selection inside one chunk
+ * Two dimensions,
+ *
+ * dim1 = space_dim1(5760)*mpi_size
+ * dim2 = space_dim2(3)
+ * chunk_dim1 = dim1
+ * chunk_dim2 = dim2
+ * block = 1 for all dimensions
+ * stride = 1 for all dimensions
+ * count0 = space_dim1(5760)
+ * count1 = space_dim2(3)
+ * start0 = mpi_rank*space_dim1
+ * start1 = 0
+ * ------------------------------------------------------------------------
+ */
+
+void
+coll_chunk1(void)
+{
+ if (MAINPROCESS)
+ printf("coll_chunk1\n");
+
+ coll_chunktest(filename, 1, BYROW_CONT, API_NONE, HYPER, HYPER, OUT_OF_ORDER);
+ coll_chunktest(filename, 1, BYROW_CONT, API_NONE, HYPER, POINT, OUT_OF_ORDER);
+ coll_chunktest(filename, 1, BYROW_CONT, API_NONE, POINT, ALL, OUT_OF_ORDER);
+ coll_chunktest(filename, 1, BYROW_CONT, API_NONE, POINT, POINT, OUT_OF_ORDER);
+ coll_chunktest(filename, 1, BYROW_CONT, API_NONE, POINT, HYPER, OUT_OF_ORDER);
+
+ coll_chunktest(filename, 1, BYROW_CONT, API_NONE, POINT, ALL, IN_ORDER);
+ coll_chunktest(filename, 1, BYROW_CONT, API_NONE, POINT, POINT, IN_ORDER);
+ coll_chunktest(filename, 1, BYROW_CONT, API_NONE, POINT, HYPER, IN_ORDER);
+}
+
+
+/*-------------------------------------------------------------------------
+ * Function: coll_chunk2
+ *
+ * Purpose: Wrapper to test the collective chunk IO for regular DISJOINT
+ selection with a single chunk
+ *
+ * Return: Success: 0
+ *
+ * Failure: -1
+ *
+ * Programmer: Unknown
+ * July 12th, 2004
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+ /* ------------------------------------------------------------------------
+ * Descriptions for the selection: many disjoint selections inside one chunk
+ * Two dimensions,
+ *
+ * dim1 = space_dim1*mpi_size(5760)
+ * dim2 = space_dim2(3)
+ * chunk_dim1 = dim1
+ * chunk_dim2 = dim2
+ * block = 1 for all dimensions
+ * stride = 3 for all dimensions
+ * count0 = space_dim1/stride0(5760/3)
+ * count1 = space_dim2/stride(3/3 = 1)
+ * start0 = mpi_rank*space_dim1
+ * start1 = 0
+ *
+ * ------------------------------------------------------------------------
+ */
+void
+coll_chunk2(void)
+{
+ if (MAINPROCESS)
+ printf("coll_chunk2\n");
+
+ coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, HYPER, HYPER, OUT_OF_ORDER);
+ coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, HYPER, POINT, OUT_OF_ORDER);
+ coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, POINT, ALL, OUT_OF_ORDER);
+ coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, POINT, POINT, OUT_OF_ORDER);
+ coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, POINT, HYPER, OUT_OF_ORDER);
+
+ coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, POINT, ALL, IN_ORDER);
+ coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, POINT, POINT, IN_ORDER);
+ coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, POINT, HYPER, IN_ORDER);
+}
+
+
+/*-------------------------------------------------------------------------
+ * Function: coll_chunk3
+ *
+ * Purpose: Wrapper to test the collective chunk IO for regular JOINT
+ selection with at least number of 2*mpi_size chunks
+ *
+ * Return: Success: 0
+ *
+ * Failure: -1
+ *
+ * Programmer: Unknown
+ * July 12th, 2004
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* ------------------------------------------------------------------------
+ * Descriptions for the selection: one singular selection accross many chunks
+ * Two dimensions, Num of chunks = 2* mpi_size
+ *
+ * dim1 = space_dim1*mpi_size
+ * dim2 = space_dim2(3)
+ * chunk_dim1 = space_dim1
+ * chunk_dim2 = dim2/2
+ * block = 1 for all dimensions
+ * stride = 1 for all dimensions
+ * count0 = space_dim1
+ * count1 = space_dim2(3)
+ * start0 = mpi_rank*space_dim1
+ * start1 = 0
+ *
+ * ------------------------------------------------------------------------
+ */
+
+void
+coll_chunk3(void)
+{
+ if (MAINPROCESS)
+ printf("coll_chunk3\n");
+
+ coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, HYPER, HYPER, OUT_OF_ORDER);
+ coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, HYPER, POINT, OUT_OF_ORDER);
+ coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, POINT, ALL, OUT_OF_ORDER);
+ coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, POINT, POINT, OUT_OF_ORDER);
+ coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, POINT, HYPER, OUT_OF_ORDER);
+
+ coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, POINT, ALL, IN_ORDER);
+ coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, POINT, POINT, IN_ORDER);
+ coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, POINT, HYPER, IN_ORDER);
+}
+
+
+//-------------------------------------------------------------------------
+// Borrowed/Modified (slightly) from t_coll_chunk.c
+/*-------------------------------------------------------------------------
+ * Function: coll_chunktest
+ *
+ * Purpose: The real testing routine for regular selection of collective
+ chunking storage
+ testing both write and read,
+ If anything fails, it may be read or write. There is no
+ separation test between read and write.
+ *
+ * Return: Success: 0
+ *
+ * Failure: -1
+ *
+ * Modifications:
+ * Remove invalid temporary property checkings for API_LINK_HARD and
+ * API_LINK_TRUE cases.
+ * Programmer: Jonathan Kim
+ * Date: 2012-10-10
+ *
+ * Programmer: Unknown
+ * July 12th, 2004
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+
+static void
+coll_chunktest(const char* filename,
+ int chunk_factor,
+ int select_factor,
+ int api_option,
+ int file_selection,
+ int mem_selection,
+ int mode)
+{
+ hid_t file, dataset, file_dataspace, mem_dataspace;
+ hid_t acc_plist,xfer_plist,crp_plist;
+
+ hsize_t dims[RANK], chunk_dims[RANK];
+ int* data_array1 = NULL;
+ int* data_origin1 = NULL;
+
+ hsize_t start[RANK],count[RANK],stride[RANK],block[RANK];
+
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ unsigned prop_value;
+#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
+
+ herr_t status;
+ MPI_Comm comm = MPI_COMM_WORLD;
+ MPI_Info info = MPI_INFO_NULL;
+
+ size_t num_points; /* for point selection */
+ hsize_t *coords = NULL; /* for point selection */
+ hsize_t current_dims; /* for point selection */
+ int i;
+
+ /* Create the data space */
+
+ acc_plist = create_faccess_plist(comm,info,facc_type);
+ VRFY((acc_plist >= 0),"");
+
+ file = H5Fcreate(filename,H5F_ACC_TRUNC,H5P_DEFAULT,acc_plist);
+ VRFY((file >= 0),"H5Fcreate succeeded");
+
+ status = H5Pclose(acc_plist);
+ VRFY((status >= 0),"");
+
+ /* setup dimensionality object */
+ dims[0] = space_dim1*mpi_size;
+ dims[1] = space_dim2;
+
+ /* allocate memory for data buffer */
+ data_array1 = (int *)HDmalloc(dims[0] * dims[1] * sizeof(int));
+ VRFY((data_array1 != NULL), "data_array1 malloc succeeded");
+
+ /* set up dimensions of the slab this process accesses */
+ ccslab_set(mpi_rank, mpi_size, start, count, stride, block, select_factor);
+
+ /* set up the coords array selection */
+ num_points = block[0] * block[1] * count[0] * count[1];
+ coords = (hsize_t *)HDmalloc(num_points * RANK * sizeof(hsize_t));
+ VRFY((coords != NULL), "coords malloc succeeded");
+ point_set(start, count, stride, block, num_points, coords, mode);
+
+ file_dataspace = H5Screate_simple(2, dims, NULL);
+ VRFY((file_dataspace >= 0), "file dataspace created succeeded");
+
+ if(ALL != mem_selection) {
+ mem_dataspace = H5Screate_simple(2, dims, NULL);
+ VRFY((mem_dataspace >= 0), "mem dataspace created succeeded");
+ }
+ else {
+ current_dims = num_points;
+ mem_dataspace = H5Screate_simple (1, &current_dims, NULL);
+ VRFY((mem_dataspace >= 0), "mem_dataspace create succeeded");
+ }
+
+ crp_plist = H5Pcreate(H5P_DATASET_CREATE);
+ VRFY((crp_plist >= 0),"");
+
+ /* Set up chunk information. */
+ chunk_dims[0] = dims[0]/chunk_factor;
+
+ /* to decrease the testing time, maintain bigger chunk size */
+ (chunk_factor == 1) ? (chunk_dims[1] = space_dim2) : (chunk_dims[1] = space_dim2/2);
+ status = H5Pset_chunk(crp_plist, 2, chunk_dims);
+ VRFY((status >= 0),"chunk creation property list succeeded");
+
+ dataset = H5Dcreate2(file, DSET_COLLECTIVE_CHUNK_NAME, H5T_NATIVE_INT,
+ file_dataspace, H5P_DEFAULT, crp_plist, H5P_DEFAULT);
+ VRFY((dataset >= 0),"dataset created succeeded");
+
+ status = H5Pclose(crp_plist);
+ VRFY((status >= 0), "");
+
+ /*put some trivial data in the data array */
+ ccdataset_fill(start, stride, count,block, data_array1, mem_selection);
+
+ MESG("data_array initialized");
+
+ switch (file_selection) {
+ case HYPER:
+ status = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block);
+ VRFY((status >= 0),"hyperslab selection succeeded");
+ break;
+
+ case POINT:
+ if (num_points) {
+ status = H5Sselect_elements(file_dataspace, H5S_SELECT_SET, num_points, coords);
+ VRFY((status >= 0),"Element selection succeeded");
+ }
+ else {
+ status = H5Sselect_none(file_dataspace);
+ VRFY((status >= 0),"none selection succeeded");
+ }
+ break;
+
+ case ALL:
+ status = H5Sselect_all(file_dataspace);
+ VRFY((status >= 0), "H5Sselect_all succeeded");
+ break;
+ }
+
+ switch (mem_selection) {
+ case HYPER:
+ status = H5Sselect_hyperslab(mem_dataspace, H5S_SELECT_SET, start, stride, count, block);
+ VRFY((status >= 0),"hyperslab selection succeeded");
+ break;
+
+ case POINT:
+ if (num_points) {
+ status = H5Sselect_elements(mem_dataspace, H5S_SELECT_SET, num_points, coords);
+ VRFY((status >= 0),"Element selection succeeded");
+ }
+ else {
+ status = H5Sselect_none(mem_dataspace);
+ VRFY((status >= 0),"none selection succeeded");
+ }
+ break;
+
+ case ALL:
+ status = H5Sselect_all(mem_dataspace);
+ VRFY((status >= 0), "H5Sselect_all succeeded");
+ break;
+ }
+
+ /* set up the collective transfer property list */
+ xfer_plist = H5Pcreate(H5P_DATASET_XFER);
+ VRFY((xfer_plist >= 0), "");
+
+ status = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
+ VRFY((status>= 0),"MPIO collective transfer property succeeded");
+ if(dxfer_coll_type == DXFER_INDEPENDENT_IO) {
+ status = H5Pset_dxpl_mpio_collective_opt(xfer_plist, H5FD_MPIO_INDIVIDUAL_IO);
+ VRFY((status>= 0),"set independent IO collectively succeeded");
+ }
+
+ switch(api_option){
+ case API_LINK_HARD:
+ status = H5Pset_dxpl_mpio_chunk_opt(xfer_plist,H5FD_MPIO_CHUNK_ONE_IO);
+ VRFY((status>= 0),"collective chunk optimization succeeded");
+ break;
+
+ case API_MULTI_HARD:
+ status = H5Pset_dxpl_mpio_chunk_opt(xfer_plist,H5FD_MPIO_CHUNK_MULTI_IO);
+ VRFY((status>= 0),"collective chunk optimization succeeded ");
+ break;
+
+ case API_LINK_TRUE:
+ status = H5Pset_dxpl_mpio_chunk_opt_num(xfer_plist,2);
+ VRFY((status>= 0),"collective chunk optimization set chunk number succeeded");
+ break;
+
+ case API_LINK_FALSE:
+ status = H5Pset_dxpl_mpio_chunk_opt_num(xfer_plist,6);
+ VRFY((status>= 0),"collective chunk optimization set chunk number succeeded");
+ break;
+
+ case API_MULTI_COLL:
+ status = H5Pset_dxpl_mpio_chunk_opt_num(xfer_plist,8);/* make sure it is using multi-chunk IO */
+ VRFY((status>= 0),"collective chunk optimization set chunk number succeeded");
+ status = H5Pset_dxpl_mpio_chunk_opt_ratio(xfer_plist,50);
+ VRFY((status>= 0),"collective chunk optimization set chunk ratio succeeded");
+ break;
+
+ case API_MULTI_IND:
+ status = H5Pset_dxpl_mpio_chunk_opt_num(xfer_plist,8);/* make sure it is using multi-chunk IO */
+ VRFY((status>= 0),"collective chunk optimization set chunk number succeeded");
+ status = H5Pset_dxpl_mpio_chunk_opt_ratio(xfer_plist,100);
+ VRFY((status>= 0),"collective chunk optimization set chunk ratio succeeded");
+ break;
+
+ default:
+ ;
+ }
+
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ if(facc_type == FACC_MPIO) {
+ switch(api_option) {
+ case API_LINK_HARD:
+ prop_value = H5D_XFER_COLL_CHUNK_DEF;
+ status = H5Pinsert2(xfer_plist, H5D_XFER_COLL_CHUNK_LINK_HARD_NAME, H5D_XFER_COLL_CHUNK_SIZE, &prop_value,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ VRFY((status >= 0),"testing property list inserted succeeded");
+ break;
+
+ case API_MULTI_HARD:
+ prop_value = H5D_XFER_COLL_CHUNK_DEF;
+ status = H5Pinsert2(xfer_plist, H5D_XFER_COLL_CHUNK_MULTI_HARD_NAME, H5D_XFER_COLL_CHUNK_SIZE, &prop_value,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ VRFY((status >= 0),"testing property list inserted succeeded");
+ break;
+
+ case API_LINK_TRUE:
+ prop_value = H5D_XFER_COLL_CHUNK_DEF;
+ status = H5Pinsert2(xfer_plist, H5D_XFER_COLL_CHUNK_LINK_NUM_TRUE_NAME, H5D_XFER_COLL_CHUNK_SIZE, &prop_value,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ VRFY((status >= 0),"testing property list inserted succeeded");
+ break;
+
+ case API_LINK_FALSE:
+ prop_value = H5D_XFER_COLL_CHUNK_DEF;
+ status = H5Pinsert2(xfer_plist, H5D_XFER_COLL_CHUNK_LINK_NUM_FALSE_NAME, H5D_XFER_COLL_CHUNK_SIZE, &prop_value,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ VRFY((status >= 0),"testing property list inserted succeeded");
+ break;
+
+ case API_MULTI_COLL:
+ prop_value = H5D_XFER_COLL_CHUNK_DEF;
+ status = H5Pinsert2(xfer_plist, H5D_XFER_COLL_CHUNK_MULTI_RATIO_COLL_NAME, H5D_XFER_COLL_CHUNK_SIZE, &prop_value,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ VRFY((status >= 0),"testing property list inserted succeeded");
+ break;
+
+ case API_MULTI_IND:
+ prop_value = H5D_XFER_COLL_CHUNK_DEF;
+ status = H5Pinsert2(xfer_plist, H5D_XFER_COLL_CHUNK_MULTI_RATIO_IND_NAME, H5D_XFER_COLL_CHUNK_SIZE, &prop_value,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ VRFY((status >= 0),"testing property list inserted succeeded");
+ break;
+
+ default:
+ ;
+ }
+ }
+#endif
+
+ /* write data collectively */
+ status = H5Dwrite(dataset, H5T_NATIVE_INT, mem_dataspace, file_dataspace,
+ xfer_plist, data_array1);
+ VRFY((status >= 0),"dataset write succeeded");
+
+#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
+ if(facc_type == FACC_MPIO) {
+ switch(api_option){
+ case API_LINK_HARD:
+ status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_LINK_HARD_NAME,&prop_value);
+ VRFY((status >= 0),"testing property list get succeeded");
+ VRFY((prop_value == 0),"API to set LINK COLLECTIVE IO directly succeeded");
+ break;
+
+ case API_MULTI_HARD:
+ status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_MULTI_HARD_NAME,&prop_value);
+ VRFY((status >= 0),"testing property list get succeeded");
+ VRFY((prop_value == 0),"API to set MULTI-CHUNK COLLECTIVE IO optimization succeeded");
+ break;
+
+ case API_LINK_TRUE:
+ status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_LINK_NUM_TRUE_NAME,&prop_value);
+ VRFY((status >= 0),"testing property list get succeeded");
+ VRFY((prop_value == 0),"API to set LINK COLLECTIVE IO succeeded");
+ break;
+
+ case API_LINK_FALSE:
+ status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_LINK_NUM_FALSE_NAME,&prop_value);
+ VRFY((status >= 0),"testing property list get succeeded");
+ VRFY((prop_value == 0),"API to set LINK IO transferring to multi-chunk IO succeeded");
+ break;
+
+ case API_MULTI_COLL:
+ status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_MULTI_RATIO_COLL_NAME,&prop_value);
+ VRFY((status >= 0),"testing property list get succeeded");
+ VRFY((prop_value == 0),"API to set MULTI-CHUNK COLLECTIVE IO with optimization succeeded");
+ break;
+
+ case API_MULTI_IND:
+ status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_MULTI_RATIO_IND_NAME,&prop_value);
+ VRFY((status >= 0),"testing property list get succeeded");
+ VRFY((prop_value == 0),"API to set MULTI-CHUNK IO transferring to independent IO succeeded");
+ break;
+
+ default:
+ ;
+ }
+ }
+#endif
+
+ status = H5Dclose(dataset);
+ VRFY((status >= 0),"");
+
+ status = H5Pclose(xfer_plist);
+ VRFY((status >= 0),"property list closed");
+
+ status = H5Sclose(file_dataspace);
+ VRFY((status >= 0),"");
+
+ status = H5Sclose(mem_dataspace);
+ VRFY((status >= 0),"");
+
+
+ status = H5Fclose(file);
+ VRFY((status >= 0),"");
+
+ if (data_array1) HDfree(data_array1);
+
+ /* Use collective read to verify the correctness of collective write. */
+
+ /* allocate memory for data buffer */
+ data_array1 = (int *)HDmalloc(dims[0]*dims[1]*sizeof(int));
+ VRFY((data_array1 != NULL), "data_array1 malloc succeeded");
+
+ /* allocate memory for data buffer */
+ data_origin1 = (int *)HDmalloc(dims[0]*dims[1]*sizeof(int));
+ VRFY((data_origin1 != NULL), "data_origin1 malloc succeeded");
+
+ acc_plist = create_faccess_plist(comm, info, facc_type);
+ VRFY((acc_plist >= 0),"MPIO creation property list succeeded");
+
+ file = H5Fopen(filename,H5F_ACC_RDONLY,acc_plist);
+ VRFY((file >= 0),"H5Fcreate succeeded");
+
+ status = H5Pclose(acc_plist);
+ VRFY((status >= 0),"");
+
+ /* open the collective dataset*/
+ dataset = H5Dopen2(file, DSET_COLLECTIVE_CHUNK_NAME, H5P_DEFAULT);
+ VRFY((dataset >= 0), "");
+
+ /* set up dimensions of the slab this process accesses */
+ ccslab_set(mpi_rank, mpi_size, start, count, stride, block, select_factor);
+
+ /* obtain the file and mem dataspace*/
+ file_dataspace = H5Dget_space (dataset);
+ VRFY((file_dataspace >= 0), "");
+
+ if (ALL != mem_selection) {
+ mem_dataspace = H5Dget_space (dataset);
+ VRFY((mem_dataspace >= 0), "");
+ }
+ else {
+ current_dims = num_points;
+ mem_dataspace = H5Screate_simple (1, &current_dims, NULL);
+ VRFY((mem_dataspace >= 0), "mem_dataspace create succeeded");
+ }
+
+ switch (file_selection) {
+ case HYPER:
+ status = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block);
+ VRFY((status >= 0),"hyperslab selection succeeded");
+ break;
+
+ case POINT:
+ if (num_points) {
+ status = H5Sselect_elements(file_dataspace, H5S_SELECT_SET, num_points, coords);
+ VRFY((status >= 0),"Element selection succeeded");
+ }
+ else {
+ status = H5Sselect_none(file_dataspace);
+ VRFY((status >= 0),"none selection succeeded");
+ }
+ break;
+
+ case ALL:
+ status = H5Sselect_all(file_dataspace);
+ VRFY((status >= 0), "H5Sselect_all succeeded");
+ break;
+ }
+
+ switch (mem_selection) {
+ case HYPER:
+ status = H5Sselect_hyperslab(mem_dataspace, H5S_SELECT_SET, start, stride, count, block);
+ VRFY((status >= 0),"hyperslab selection succeeded");
+ break;
+
+ case POINT:
+ if (num_points) {
+ status = H5Sselect_elements(mem_dataspace, H5S_SELECT_SET, num_points, coords);
+ VRFY((status >= 0),"Element selection succeeded");
+ }
+ else {
+ status = H5Sselect_none(mem_dataspace);
+ VRFY((status >= 0),"none selection succeeded");
+ }
+ break;
+
+ case ALL:
+ status = H5Sselect_all(mem_dataspace);
+ VRFY((status >= 0), "H5Sselect_all succeeded");
+ break;
+ }
+
+ /* fill dataset with test data */
+ ccdataset_fill(start, stride,count,block, data_origin1, mem_selection);
+ xfer_plist = H5Pcreate (H5P_DATASET_XFER);
+ VRFY((xfer_plist >= 0),"");
+
+ status = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE);
+ VRFY((status>= 0),"MPIO collective transfer property succeeded");
+ if(dxfer_coll_type == DXFER_INDEPENDENT_IO) {
+ status = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO);
+ VRFY((status>= 0),"set independent IO collectively succeeded");
+ }
+
+ status = H5Dread(dataset, H5T_NATIVE_INT, mem_dataspace, file_dataspace,
+ xfer_plist, data_array1);
+ VRFY((status >=0),"dataset read succeeded");
+
+ /* verify the read data with original expected data */
+ status = ccdataset_vrfy(start, count, stride, block, data_array1, data_origin1, mem_selection);
+ if (status) nerrors++;
+
+ status = H5Pclose(xfer_plist);
+ VRFY((status >= 0),"property list closed");
+
+ /* close dataset collectively */
+ status=H5Dclose(dataset);
+ VRFY((status >= 0), "H5Dclose");
+
+ /* release all IDs created */
+ status = H5Sclose(file_dataspace);
+ VRFY((status >= 0),"H5Sclose");
+
+ status = H5Sclose(mem_dataspace);
+ VRFY((status >= 0),"H5Sclose");
+
+ /* close the file collectively */
+ status = H5Fclose(file);
+ VRFY((status >= 0),"H5Fclose");
+
+ /* release data buffers */
+ if(coords) HDfree(coords);
+ if(data_array1) HDfree(data_array1);
+ if(data_origin1) HDfree(data_origin1);
+
+}
+
+
+
+/*****************************************************************************
+ *
+ * Function: do_express_test()
+ *
+ * Purpose: Do an MPI_Allreduce to obtain the maximum value returned
+ * by GetTestExpress() across all processes. Return this
+ * value.
+ *
+ * Envirmoment variables can be different across different
+ * processes. This function ensures that all processes agree
+ * on whether to do an express test.
+ *
+ * Return: Success: Maximum of the values returned by
+ * GetTestExpress() across all processes.
+ *
+ * Failure: -1
+ *
+ * Programmer: JRM -- 4/25/06
+ *
+ *****************************************************************************/
+static int
+do_express_test(int world_mpi_rank)
+{
+ int express_test;
+ int max_express_test;
+ int result;
+
+ express_test = GetTestExpress();
+
+ result = MPI_Allreduce((void *)&express_test,
+ (void *)&max_express_test,
+ 1,
+ MPI_INT,
+ MPI_MAX,
+ MPI_COMM_WORLD);
+
+ if ( result != MPI_SUCCESS ) {
+ nerrors++;
+ max_express_test = -1;
+ if ( VERBOSE_MED && (world_mpi_rank == 0)) {
+ HDfprintf(stdout, "%d:%s: MPI_Allreduce() failed.\n",
+ world_mpi_rank, FUNC );
+ }
+ }
+
+ return(max_express_test);
+
+} /* do_express_test() */
+
+
+int main(int argc, char **argv)
+{
+ int ExpressMode = 0;
+ hsize_t newsize = 1048576;
+ hsize_t oldsize = H5S_mpio_set_bigio_count(newsize);
+
+ if (newsize != oldsize) {
+ bigcount = newsize * 2;
+ }
+
+ MPI_Init(&argc, &argv);
+ MPI_Comm_size(MPI_COMM_WORLD,&mpi_size);
+ MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank);
+
+ ExpressMode = do_express_test(mpi_rank);
+
+ dataset_big_write();
+ MPI_Barrier(MPI_COMM_WORLD);
+
+ dataset_big_read();
+ MPI_Barrier(MPI_COMM_WORLD);
+
+ if (ExpressMode > 0) {
+ printf("***Express test mode on. Several tests are skipped\n");
+ }
+ else {
+ coll_chunk1();
+ MPI_Barrier(MPI_COMM_WORLD);
+ coll_chunk2();
+ MPI_Barrier(MPI_COMM_WORLD);
+ coll_chunk3();
+ }
+
+ /* close HDF5 library */
+ H5close();
+
+ MPI_Finalize();
+
+ return 0;
+}
+