summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/H5FDmpio.c37
-rw-r--r--src/H5Smpio.c180
-rw-r--r--src/H5Sprivate.h1
-rw-r--r--src/H5mpi.c167
-rw-r--r--testpar/t_bigio.c99
5 files changed, 297 insertions, 187 deletions
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index 71e9fe1..11f0411 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -22,15 +22,15 @@
#include "H5FDdrvr_module.h" /* This source code file is part of the H5FD driver module */
-#include "H5private.h" /* Generic Functions */
+#include "H5private.h" /* Generic Functions */
#include "H5CXprivate.h" /* API Contexts */
-#include "H5Dprivate.h" /* Dataset functions */
-#include "H5Eprivate.h" /* Error handling */
-#include "H5Fprivate.h" /* File access */
-#include "H5FDprivate.h" /* File drivers */
-#include "H5FDmpi.h" /* MPI-based file drivers */
-#include "H5Iprivate.h" /* IDs */
-#include "H5MMprivate.h" /* Memory management */
+#include "H5Dprivate.h" /* Dataset functions */
+#include "H5Eprivate.h" /* Error handling */
+#include "H5Fprivate.h" /* File access */
+#include "H5FDprivate.h" /* File drivers */
+#include "H5FDmpi.h" /* MPI-based file drivers */
+#include "H5Iprivate.h" /* IDs */
+#include "H5MMprivate.h" /* Memory management */
#include "H5Pprivate.h" /* Property lists */
#ifdef H5_HAVE_PARALLEL
@@ -1324,6 +1324,7 @@ done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5FD__mpio_read() */
+
/*-------------------------------------------------------------------------
* Function: H5FD__mpio_write
@@ -1366,6 +1367,7 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id,
#endif
int size_i;
hbool_t use_view_this_time = FALSE;
+ hbool_t derived_type = FALSE;
H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode */
herr_t ret_value = SUCCEED;
@@ -1391,8 +1393,6 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id,
if(H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off) < 0)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off")
size_i = (int)size;
- if((hsize_t)size_i != size)
- HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i")
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'w'])
@@ -1430,6 +1430,20 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id,
*/
mpi_off = 0;
} /* end if */
+ else if(size != (hsize_t)size_i) {
+ /* If HERE, then we need to work around the integer size limit
+ * of 2GB. The input size_t size variable cannot fit into an integer,
+ * but we can get around that limitation by creating a different datatype
+ * and then setting the integer size (or element count) to 1 when using
+ * the derived_type.
+ */
+
+ if (H5_mpio_create_large_type(size, 0, MPI_BYTE, &buf_type) < 0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_CANTGET, FAIL, "can't create MPI-I/O datatype")
+
+ derived_type = TRUE;
+ size_i = 1;
+ }
/* Write the data. */
if(use_view_this_time) {
@@ -1506,6 +1520,9 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id,
file->local_eof = addr + (haddr_t)bytes_written;
done:
+ if(derived_type) {
+ MPI_Type_free(&buf_type);
+ }
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Leaving, proc %d: ret_value = %d\n", FUNC, file->mpi_rank, ret_value );
diff --git a/src/H5Smpio.c b/src/H5Smpio.c
index aeec566..9112d24 100644
--- a/src/H5Smpio.c
+++ b/src/H5Smpio.c
@@ -42,11 +42,10 @@
/* Local Macros */
/****************/
#define H5S_MPIO_INITIAL_ALLOC_COUNT 256
-#define TWO_GIG_LIMIT 2147483648
-#ifndef H5S_MAX_MPI_COUNT
-#define H5S_MAX_MPI_COUNT 536870911 /* (2^29)-1 */
-#endif
+/*******************/
+/* Local Variables */
+/*******************/
/******************/
/* Local Typedefs */
@@ -88,8 +87,6 @@ static herr_t H5S__release_datatype(H5S_mpio_mpitype_list_t *type_list);
static herr_t H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down,
size_t elmt_size, const MPI_Datatype *elmt_type, MPI_Datatype *span_type,
H5S_mpio_mpitype_list_t *type_list, uint64_t op_gen);
-static herr_t H5S__mpio_create_large_type(hsize_t num_elements, MPI_Aint stride_bytes,
- MPI_Datatype old_type, MPI_Datatype *new_type);
/*****************************/
@@ -102,40 +99,9 @@ static herr_t H5S__mpio_create_large_type(hsize_t num_elements, MPI_Aint stride_
/*********************/
-/*******************/
-/* Local Variables */
-/*******************/
-static hsize_t bigio_count = H5S_MAX_MPI_COUNT;
-
/* Declare a free list to manage the H5S_mpio_mpitype_node_t struct */
H5FL_DEFINE_STATIC(H5S_mpio_mpitype_node_t);
-
-
-/*-------------------------------------------------------------------------
- * Function: H5S_mpio_set_bigio_count
- *
- * Purpose: Allow us to programatically change the switch point
- * when we utilize derived datatypes. This is of
- * particular interest for allowing nightly testing
- *
- * Return: The current/previous value of bigio_count.
- *
- * Programmer: Richard Warren, March 10, 2017
- *
- *-------------------------------------------------------------------------
- */
-hsize_t
-H5S_mpio_set_bigio_count(hsize_t new_count)
-{
- hsize_t orig_count = bigio_count;
-
- if((new_count > 0) && (new_count < TWO_GIG_LIMIT))
- bigio_count = new_count;
-
- return orig_count;
-} /* end H5S_mpio_set_bigio_count() */
-
/*-------------------------------------------------------------------------
* Function: H5S__mpio_all_type
@@ -160,6 +126,7 @@ H5S__mpio_all_type(const H5S_t *space, size_t elmt_size,
hsize_t total_bytes;
hssize_t snelmts; /* Total number of elmts (signed) */
hsize_t nelmts; /* Total number of elmts */
+ hsize_t bigio_count; /* Transition point to create derived type */
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_STATIC
@@ -173,6 +140,7 @@ H5S__mpio_all_type(const H5S_t *space, size_t elmt_size,
H5_CHECKED_ASSIGN(nelmts, hsize_t, snelmts, hssize_t);
total_bytes = (hsize_t)elmt_size * nelmts;
+ bigio_count = H5_mpio_get_bigio_count();
/* Verify that the size can be expressed as a 32 bit integer */
if(bigio_count >= total_bytes) {
@@ -183,7 +151,7 @@ H5S__mpio_all_type(const H5S_t *space, size_t elmt_size,
} /* end if */
else {
/* Create a LARGE derived datatype for this transfer */
- if(H5S__mpio_create_large_type(total_bytes, 0, MPI_BYTE, new_type) < 0)
+ if(H5_mpio_create_large_type(total_bytes, 0, MPI_BYTE, new_type) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large datatype from the all selection")
*count = 1;
*is_derived_type = TRUE;
@@ -250,6 +218,7 @@ H5S__mpio_create_point_datatype(size_t elmt_size, hsize_t num_points,
int *blocks = NULL; /* Array of block sizes for MPI hindexed create call */
hsize_t u; /* Local index variable */
#endif
+ hsize_t bigio_count; /* Transition point to create derived type */
int mpi_code; /* MPI error code */
herr_t ret_value = SUCCEED; /* Return value */
@@ -260,6 +229,8 @@ H5S__mpio_create_point_datatype(size_t elmt_size, hsize_t num_points,
HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
elmt_type_created = TRUE;
+ bigio_count = H5_mpio_get_bigio_count();
+
/* Check whether standard or BIGIO processing will be employeed */
if(bigio_count >= num_points) {
#if MPI_VERSION >= 3
@@ -518,7 +489,7 @@ done:
* selection and so the memory datatype has to be permuted using the
* permutation map created by the file selection.
*
- * Note: This routine is called from H5S_mpio_space_type(), which is
+ * Note: This routine is called from H5_mpio_space_type(), which is
* called first for the file dataspace and creates
*
* Return: Non-negative on success, negative on failure.
@@ -678,6 +649,7 @@ H5S__mpio_reg_hyper_type(const H5S_t *space, size_t elmt_size,
hsize_t count;
} d[H5S_MAX_RANK];
+ hsize_t bigio_count; /* Transition point to create derived type */
hsize_t offset[H5S_MAX_RANK];
hsize_t max_xtent[H5S_MAX_RANK];
H5S_hyper_dim_t *diminfo; /* [rank] */
@@ -696,6 +668,7 @@ H5S__mpio_reg_hyper_type(const H5S_t *space, size_t elmt_size,
HDassert(space);
HDassert(sizeof(MPI_Aint) >= sizeof(elmt_size));
+ bigio_count = H5_mpio_get_bigio_count();
/* Initialize selection iterator */
if(H5S_select_iter_init(&sel_iter, space, elmt_size, 0) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator")
@@ -824,7 +797,7 @@ if(H5DEBUG(S)) {
} /* end if */
else
/* Create the compound datatype for this operation (> 2GB) */
- if(H5S__mpio_create_large_type(elmt_size, 0, MPI_BYTE, &inner_type) < 0)
+ if(H5_mpio_create_large_type(elmt_size, 0, MPI_BYTE, &inner_type) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large inner datatype in hyper selection")
/*******************************************************
@@ -878,7 +851,7 @@ if(H5DEBUG(S))
* Again we need to check that the number of BLOCKS can fit into
* a 32 bit integer */
if(bigio_count < d[i].block) {
- if(H5S__mpio_create_large_type(d[i].block, 0, inner_type, &block_type) < 0)
+ if(H5_mpio_create_large_type(d[i].block, 0, inner_type, &block_type) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large block datatype in hyper selection")
} /* end if */
else
@@ -899,7 +872,7 @@ if(H5DEBUG(S))
* we call the large type creation function to handle that
*/
if(bigio_count < d[i].count) {
- if(H5S__mpio_create_large_type(d[i].count, stride_in_bytes, block_type, &outer_type) < 0)
+ if(H5_mpio_create_large_type(d[i].count, stride_in_bytes, block_type, &outer_type) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large outer datatype in hyper selection")
} /* end if */
/* otherwise a regular create_hvector will do */
@@ -1001,6 +974,7 @@ H5S__mpio_span_hyper_type(const H5S_t *space, size_t elmt_size,
MPI_Datatype elmt_type; /* MPI datatype for an element */
hbool_t elmt_type_is_derived = FALSE; /* Whether the element type has been created */
MPI_Datatype span_type; /* MPI datatype for overall span tree */
+ hsize_t bigio_count; /* Transition point to create derived type */
hsize_t down[H5S_MAX_RANK]; /* 'down' sizes for each dimension */
uint64_t op_gen; /* Operation generation value */
int mpi_code; /* MPI return code */
@@ -1014,13 +988,14 @@ H5S__mpio_span_hyper_type(const H5S_t *space, size_t elmt_size,
HDassert(space->select.sel_info.hslab->span_lst);
HDassert(space->select.sel_info.hslab->span_lst->head);
+ bigio_count = H5_mpio_get_bigio_count();
/* Create the base type for an element */
if(bigio_count >= elmt_size) {
if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &elmt_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
} /* end if */
else
- if(H5S__mpio_create_large_type(elmt_size, 0, MPI_BYTE, &elmt_type) < 0)
+ if(H5_mpio_create_large_type(elmt_size, 0, MPI_BYTE, &elmt_type) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large element datatype in span_hyper selection")
elmt_type_is_derived = TRUE;
@@ -1124,8 +1099,10 @@ H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down,
H5S_mpio_mpitype_list_t *type_list, uint64_t op_gen)
{
H5S_hyper_span_t *span; /* Hyperslab span to iterate with */
+ hsize_t bigio_count; /* Transition point to create derived type */
+
size_t alloc_count = 0; /* Number of span tree nodes allocated at this level */
- size_t outercount; /* Number of span tree nodes at this level */
+ size_t outercount = 0; /* Number of span tree nodes at this level */
MPI_Datatype *inner_type = NULL;
hbool_t inner_types_freed = FALSE; /* Whether the inner_type MPI datatypes have been freed */
int *blocklen = NULL;
@@ -1140,6 +1117,7 @@ H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down,
HDassert(spans);
HDassert(type_list);
+ bigio_count = H5_mpio_get_bigio_count();
/* Check if we've visited this span tree before */
if(spans->op_gen != op_gen) {
H5S_mpio_mpitype_node_t *type_node; /* Pointer to new node in MPI data type list */
@@ -1185,7 +1163,7 @@ H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down,
H5_CHECK_OVERFLOW(nelmts, hsize_t, int)
blocklen[outercount] = (int)nelmts;
- if(bigio_count < blocklen[outercount])
+ if(bigio_count < (hsize_t)blocklen[outercount])
large_block = TRUE; /* at least one block type is large, so set this flag to true */
span = span->next;
@@ -1202,8 +1180,8 @@ H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down,
MPI_Datatype temp_type = MPI_DATATYPE_NULL;
/* create the block type from elmt_type while checking the 32 bit int limit */
- if(blocklen[u] > bigio_count) {
- if(H5S__mpio_create_large_type(blocklen[u], 0, *elmt_type, &temp_type) < 0)
+ if((hsize_t)(blocklen[u]) > bigio_count) {
+ if(H5_mpio_create_large_type(blocklen[u], 0, *elmt_type, &temp_type) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large element datatype in span_hyper selection")
} /* end if */
else
@@ -1453,113 +1431,5 @@ done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5S_mpio_space_type() */
-
-/*-------------------------------------------------------------------------
- * Function: H5S__mpio_create_large_type
- *
- * Purpose: Create a large datatype of size larger than what a 32 bit integer
- * can hold.
- *
- * Return: Non-negative on success, negative on failure.
- *
- * *new_type the new datatype created
- *
- * Programmer: Mohamad Chaarawi
- *
- *-------------------------------------------------------------------------
- */
-static herr_t
-H5S__mpio_create_large_type(hsize_t num_elements, MPI_Aint stride_bytes,
- MPI_Datatype old_type, MPI_Datatype *new_type)
-{
- int num_big_types; /* num times the 2G datatype will be repeated */
- int remaining_bytes; /* the number of bytes left that can be held in an int value */
- hsize_t leftover;
- int block_len[2];
- int mpi_code; /* MPI return code */
- MPI_Datatype inner_type, outer_type, leftover_type, type[2];
- MPI_Aint disp[2], old_extent;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Calculate how many Big MPI datatypes are needed to represent the buffer */
- num_big_types = (int)(num_elements/bigio_count);
- leftover = num_elements - num_big_types * (hsize_t)bigio_count;
- H5_CHECKED_ASSIGN(remaining_bytes, int, leftover, hsize_t);
-
- /* Create a contiguous datatype of size equal to the largest
- * number that a 32 bit integer can hold x size of old type.
- * If the displacement is 0, then the type is contiguous, otherwise
- * use type_hvector to create the type with the displacement provided
- */
- if (0 == stride_bytes) {
- if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(bigio_count, old_type, &inner_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
- } /* end if */
- else
- if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector(bigio_count, 1, stride_bytes, old_type, &inner_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
-
- /* Create a contiguous datatype of the buffer (minus the remaining < 2GB part)
- * If a stride is present, use hvector type
- */
- if(0 == stride_bytes) {
- if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(num_big_types, inner_type, &outer_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
- } /* end if */
- else
- if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector(num_big_types, 1, stride_bytes, inner_type, &outer_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
-
- MPI_Type_free(&inner_type);
-
- /* If there is a remaining part create a contiguous/vector datatype and then
- * use a struct datatype to encapsulate everything.
- */
- if(remaining_bytes) {
- if(stride_bytes == 0) {
- if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(remaining_bytes, old_type, &leftover_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
- } /* end if */
- else
- if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector((int)(num_elements - (hsize_t)num_big_types * bigio_count), 1, stride_bytes, old_type, &leftover_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
-
- /* As of version 4.0, OpenMPI now turns off MPI-1 API calls by default,
- * so we're using the MPI-2 version even though we don't need the lb
- * value.
- */
- {
- MPI_Aint unused_lb_arg;
- MPI_Type_get_extent(old_type, &unused_lb_arg, &old_extent);
- }
-
- /* Set up the arguments for MPI_Type_struct constructor */
- type[0] = outer_type;
- type[1] = leftover_type;
- block_len[0] = 1;
- block_len[1] = 1;
- disp[0] = 0;
- disp[1] = (old_extent + stride_bytes) * num_big_types * (MPI_Aint)bigio_count;
-
- if(MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(2, block_len, disp, type, new_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
-
- MPI_Type_free(&outer_type);
- MPI_Type_free(&leftover_type);
- } /* end if */
- else
- /* There are no remaining bytes so just set the new type to
- * the outer type created */
- *new_type = outer_type;
-
- if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5S__mpio_create_large_type() */
-
#endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5Sprivate.h b/src/H5Sprivate.h
index 0a9d2e7..3d68de0 100644
--- a/src/H5Sprivate.h
+++ b/src/H5Sprivate.h
@@ -307,7 +307,6 @@ H5_DLL herr_t H5S_select_iter_release(H5S_sel_iter_t *sel_iter);
H5_DLL herr_t H5S_sel_iter_close(H5S_sel_iter_t *sel_iter);
#ifdef H5_HAVE_PARALLEL
-H5_DLL hsize_t H5S_mpio_set_bigio_count(hsize_t new_count);
H5_DLL herr_t H5S_mpio_space_type(const H5S_t *space, size_t elmt_size,
/* out: */ MPI_Datatype *new_type,
int *count,
diff --git a/src/H5mpi.c b/src/H5mpi.c
index d48790b..f01e16a 100644
--- a/src/H5mpi.c
+++ b/src/H5mpi.c
@@ -22,6 +22,64 @@
#ifdef H5_HAVE_PARALLEL
+
+/****************/
+/* Local Macros */
+/****************/
+#define TWO_GIG_LIMIT (1 << 31)
+#ifndef H5_MAX_MPI_COUNT
+#define H5_MAX_MPI_COUNT (1 << 30)
+#endif
+
+/*******************/
+/* Local Variables */
+/*******************/
+static hsize_t bigio_count = H5_MAX_MPI_COUNT;
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5_mpio_set_bigio_count
+ *
+ * Purpose: Allow us to programatically change the switch point
+ * when we utilize derived datatypes. This is of
+ * particular interest for allowing nightly testing
+ *
+ * Return: The current/previous value of bigio_count.
+ *
+ * Programmer: Richard Warren, March 10, 2017
+ *
+ *-------------------------------------------------------------------------
+ */
+hsize_t
+H5_mpio_set_bigio_count(hsize_t new_count)
+{
+ hsize_t orig_count = bigio_count;
+
+ if((new_count > 0) && (new_count < (hsize_t)TWO_GIG_LIMIT)) {
+ bigio_count = new_count;
+ }
+ return orig_count;
+} /* end H5_mpio_set_bigio_count() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5_mpio_get_bigio_count
+ *
+ * Purpose: Allow other HDF5 library functions to access
+ * the current value for bigio_count.
+ *
+ * Return: The current/previous value of bigio_count.
+ *
+ * Programmer: Richard Warren, October 7, 2019
+ *
+ *-------------------------------------------------------------------------
+ */
+hsize_t
+H5_mpio_get_bigio_count()
+{
+ return bigio_count;
+}
+
/*-------------------------------------------------------------------------
* Function: H5_mpi_comm_dup
@@ -392,5 +450,114 @@ done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5_mpi_info_cmp() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5_mpio_create_large_type
+ *
+ * Purpose: Create a large datatype of size larger than what a 32 bit integer
+ * can hold.
+ *
+ * Return: Non-negative on success, negative on failure.
+ *
+ * *new_type the new datatype created
+ *
+ * Programmer: Mohamad Chaarawi
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5_mpio_create_large_type(hsize_t num_elements, MPI_Aint stride_bytes,
+ MPI_Datatype old_type, MPI_Datatype *new_type)
+{
+ int num_big_types; /* num times the 2G datatype will be repeated */
+ int remaining_bytes; /* the number of bytes left that can be held in an int value */
+ hsize_t leftover;
+ int block_len[2];
+ int mpi_code; /* MPI return code */
+ MPI_Datatype inner_type, outer_type, leftover_type, type[2];
+ MPI_Aint disp[2], old_extent;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /* Calculate how many Big MPI datatypes are needed to represent the buffer */
+ num_big_types = (int)(num_elements/bigio_count);
+ leftover = num_elements - num_big_types * (hsize_t)bigio_count;
+ H5_CHECKED_ASSIGN(remaining_bytes, int, leftover, hsize_t);
+
+ /* Create a contiguous datatype of size equal to the largest
+ * number that a 32 bit integer can hold x size of old type.
+ * If the displacement is 0, then the type is contiguous, otherwise
+ * use type_hvector to create the type with the displacement provided
+ */
+ if (0 == stride_bytes) {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(bigio_count, old_type, &inner_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
+ } /* end if */
+ else
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector(bigio_count, 1, stride_bytes, old_type, &inner_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
+
+ /* Create a contiguous datatype of the buffer (minus the remaining < 2GB part)
+ * If a stride is present, use hvector type
+ */
+ if(0 == stride_bytes) {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(num_big_types, inner_type, &outer_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
+ } /* end if */
+ else
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector(num_big_types, 1, stride_bytes, inner_type, &outer_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
+
+ MPI_Type_free(&inner_type);
+
+ /* If there is a remaining part create a contiguous/vector datatype and then
+ * use a struct datatype to encapsulate everything.
+ */
+ if(remaining_bytes) {
+ if(stride_bytes == 0) {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(remaining_bytes, old_type, &leftover_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
+ } /* end if */
+ else
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector((int)(num_elements - (hsize_t)num_big_types * bigio_count), 1, stride_bytes, old_type, &leftover_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
+
+ /* As of version 4.0, OpenMPI now turns off MPI-1 API calls by default,
+ * so we're using the MPI-2 version even though we don't need the lb
+ * value.
+ */
+ {
+ MPI_Aint unused_lb_arg;
+ MPI_Type_get_extent(old_type, &unused_lb_arg, &old_extent);
+ }
+
+ /* Set up the arguments for MPI_Type_struct constructor */
+ type[0] = outer_type;
+ type[1] = leftover_type;
+ block_len[0] = 1;
+ block_len[1] = 1;
+ disp[0] = 0;
+ disp[1] = (old_extent + stride_bytes) * num_big_types * (MPI_Aint)bigio_count;
+
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(2, block_len, disp, type, new_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
+
+ MPI_Type_free(&outer_type);
+ MPI_Type_free(&leftover_type);
+ } /* end if */
+ else
+ /* There are no remaining bytes so just set the new type to
+ * the outer type created */
+ *new_type = outer_type;
+
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5_mpio_create_large_type() */
+
+
#endif /* H5_HAVE_PARALLEL */
diff --git a/testpar/t_bigio.c b/testpar/t_bigio.c
index 9ca077c..fe96c83 100644
--- a/testpar/t_bigio.c
+++ b/testpar/t_bigio.c
@@ -4,7 +4,8 @@
#include "H5Dprivate.h" /* For Chunk tests */
/* FILENAME and filenames must have the same number of names */
-const char *FILENAME[2]={ "bigio_test.h5",
+const char *FILENAME[3]={ "bigio_test.h5",
+ "single_rank_independent_io.h5",
NULL
};
@@ -29,7 +30,8 @@ const char *FILENAME[2]={ "bigio_test.h5",
#define DATASET5 "DSET5"
#define DXFER_COLLECTIVE_IO 0x1 /* Collective IO*/
#define DXFER_INDEPENDENT_IO 0x2 /* Independent IO collectively */
-#define DXFER_BIGCOUNT 536870916
+#define DXFER_BIGCOUNT (1 < 29)
+#define LARGE_DIM 1610612736
#define HYPER 1
#define POINT 2
@@ -40,7 +42,7 @@ typedef hsize_t B_DATATYPE;
int facc_type = FACC_MPIO; /*Test file access type */
int dxfer_coll_type = DXFER_COLLECTIVE_IO;
-size_t bigcount = DXFER_BIGCOUNT;
+size_t bigcount = (size_t)DXFER_BIGCOUNT;
int nerrors = 0;
int mpi_size, mpi_rank;
@@ -51,6 +53,8 @@ static void coll_chunktest(const char* filename, int chunk_factor, int select_fa
int api_option, int file_selection, int mem_selection, int mode);
hid_t create_faccess_plist(MPI_Comm comm, MPI_Info info, int l_facc_type);
+hsize_t H5_mpio_set_bigio_count(hsize_t new_count);
+
/*
* Setup the coordinates for point selection.
*/
@@ -478,22 +482,19 @@ static void
dataset_big_write(void)
{
- hid_t xfer_plist; /* Dataset transfer properties list */
- hid_t sid; /* Dataspace ID */
- hid_t file_dataspace; /* File dataspace ID */
- hid_t mem_dataspace; /* memory dataspace ID */
+ hid_t xfer_plist; /* Dataset transfer properties list */
+ hid_t sid; /* Dataspace ID */
+ hid_t file_dataspace; /* File dataspace ID */
+ hid_t mem_dataspace; /* memory dataspace ID */
hid_t dataset;
- hid_t datatype; /* Datatype ID */
- hsize_t dims[RANK]; /* dataset dim sizes */
- hsize_t start[RANK]; /* for hyperslab setting */
- hsize_t count[RANK], stride[RANK]; /* for hyperslab setting */
- hsize_t block[RANK]; /* for hyperslab setting */
+ hsize_t dims[RANK]; /* dataset dim sizes */
+ hsize_t start[RANK]; /* for hyperslab setting */
+ hsize_t count[RANK],stride[RANK]; /* for hyperslab setting */
+ hsize_t block[RANK]; /* for hyperslab setting */
hsize_t *coords = NULL;
- int i;
- herr_t ret; /* Generic return value */
- hid_t fid; /* HDF5 file ID */
- hid_t acc_tpl; /* File access templates */
- hsize_t h;
+ herr_t ret; /* Generic return value */
+ hid_t fid; /* HDF5 file ID */
+ hid_t acc_tpl; /* File access templates */
size_t num_points;
B_DATATYPE * wdata;
@@ -806,8 +807,6 @@ dataset_big_read(void)
hsize_t start[RANK]; /* for hyperslab setting */
hsize_t count[RANK], stride[RANK]; /* for hyperslab setting */
hsize_t block[RANK]; /* for hyperslab setting */
- int i,j,k;
- hsize_t h;
size_t num_points;
hsize_t *coords = NULL;
herr_t ret; /* Generic return value */
@@ -1120,6 +1119,63 @@ dataset_big_read(void)
} /* dataset_large_readAll */
+static void
+single_rank_independent_io(void)
+{
+ if (mpi_rank == 0)
+ HDprintf("single_rank_independent_io\n");
+
+ if (MAINPROCESS) {
+ hsize_t dims[] = { LARGE_DIM };
+ hid_t file_id = -1;
+ hid_t fapl_id = -1;
+ hid_t dset_id = -1;
+ hid_t fspace_id = -1;
+ hid_t mspace_id = -1;
+ void *data = NULL;
+
+ fapl_id = H5Pcreate(H5P_FILE_ACCESS);
+ VRFY((fapl_id >= 0), "H5P_FILE_ACCESS");
+
+ H5Pset_fapl_mpio(fapl_id, MPI_COMM_SELF, MPI_INFO_NULL);
+ file_id = H5Fcreate(FILENAME[1], H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
+ VRFY((file_id >= 0), "H5Dcreate2 succeeded");
+
+ fspace_id = H5Screate_simple(1, dims, NULL);
+ VRFY((fspace_id >= 0), "H5Screate_simple fspace_id succeeded");
+
+ /*
+ * Create and write to a >2GB dataset from a single rank.
+ */
+ dset_id = H5Dcreate2(file_id, "test_dset", H5T_NATIVE_INT, fspace_id,
+ H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+
+ VRFY((dset_id >= 0), "H5Dcreate2 succeeded");
+
+ data = malloc(LARGE_DIM * sizeof(int));
+
+ if (mpi_rank == 0)
+ H5Sselect_all(fspace_id);
+ else
+ H5Sselect_none(fspace_id);
+
+ dims[0] = LARGE_DIM;
+ mspace_id = H5Screate_simple(1, dims, NULL);
+ VRFY((mspace_id >= 0), "H5Screate_simple mspace_id succeeded");
+ H5Dwrite(dset_id, H5T_NATIVE_INT, mspace_id, fspace_id, H5P_DEFAULT, data);
+
+ free(data);
+ H5Sclose(mspace_id);
+ H5Sclose(fspace_id);
+ H5Pclose(fapl_id);
+ H5Dclose(dset_id);
+ H5Fclose(file_id);
+
+ HDremove(FILENAME[1]);
+
+ }
+ MPI_Barrier(MPI_COMM_WORLD);
+}
/*
* Create the appropriate File access property list
@@ -1395,7 +1451,6 @@ coll_chunktest(const char* filename,
size_t num_points; /* for point selection */
hsize_t *coords = NULL; /* for point selection */
- int i;
/* Create the data space */
@@ -1873,7 +1928,7 @@ int main(int argc, char **argv)
int ExpressMode = 0;
hsize_t newsize = 1048576;
/* Set the bigio processing limit to be 'newsize' bytes */
- hsize_t oldsize = H5S_mpio_set_bigio_count(newsize);
+ hsize_t oldsize = H5_mpio_set_bigio_count(newsize);
/* Having set the bigio handling to a size that is managable,
* we'll set our 'bigcount' variable to be 2X that limit so
@@ -1918,6 +1973,8 @@ int main(int argc, char **argv)
coll_chunk2();
MPI_Barrier(MPI_COMM_WORLD);
coll_chunk3();
+ MPI_Barrier(MPI_COMM_WORLD);
+ single_rank_independent_io();
}
/* turn off alarm */