summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Warren <Richard.Warren@hdfgroup.org>2019-10-08 19:57:43 (GMT)
committerRichard Warren <Richard.Warren@hdfgroup.org>2019-10-08 19:57:43 (GMT)
commite9f253c7567cf508a3d40b863dce6455f0c93fdb (patch)
tree58fd8d9d1b32c5d74fc21073803efb2c73602b23
parentf32e70895ef278a48c498477b9c29f131819e2f4 (diff)
parentabb43d3d3ac2e802d0efbe21e028381ed99722f9 (diff)
downloadhdf5-e9f253c7567cf508a3d40b863dce6455f0c93fdb.zip
hdf5-e9f253c7567cf508a3d40b863dce6455f0c93fdb.tar.gz
hdf5-e9f253c7567cf508a3d40b863dce6455f0c93fdb.tar.bz2
Merge pull request #1987 in HDFFV/hdf5 from HDFFV-10539-2GB-Write-Independent to develop
* commit 'abb43d3d3ac2e802d0efbe21e028381ed99722f9': Remove debugging logic from the new t_bigio test Expanded t_bigio.c to include Jordan's test from HDFFV-10539 Another cleanup pass as suggested by the reviewers. Fix some typos and remove an unused prototype from H5Sprivate.h At the suggestion of the PR reviewers, moved the mpio_create_large_type to H5mpi.c and renamed the function appropriately. Also moved some support functions to set and get the vvalue where we transistion to using derived datatypes. Made code review edit suggested by Jerome, plus various code updates to files that I touched to eliminate compile warnings (on my Linux box). No functional changes, just removed some tab characters Make the initial bug fixes to allow >2GB writes with Independent IO
-rw-r--r--src/H5FDmpio.c37
-rw-r--r--src/H5Smpio.c180
-rw-r--r--src/H5Sprivate.h1
-rw-r--r--src/H5mpi.c167
-rw-r--r--src/H5private.h3
-rw-r--r--testpar/t_bigio.c99
-rw-r--r--testpar/t_file.c13
7 files changed, 306 insertions, 194 deletions
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index 71e9fe1..11f0411 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -22,15 +22,15 @@
#include "H5FDdrvr_module.h" /* This source code file is part of the H5FD driver module */
-#include "H5private.h" /* Generic Functions */
+#include "H5private.h" /* Generic Functions */
#include "H5CXprivate.h" /* API Contexts */
-#include "H5Dprivate.h" /* Dataset functions */
-#include "H5Eprivate.h" /* Error handling */
-#include "H5Fprivate.h" /* File access */
-#include "H5FDprivate.h" /* File drivers */
-#include "H5FDmpi.h" /* MPI-based file drivers */
-#include "H5Iprivate.h" /* IDs */
-#include "H5MMprivate.h" /* Memory management */
+#include "H5Dprivate.h" /* Dataset functions */
+#include "H5Eprivate.h" /* Error handling */
+#include "H5Fprivate.h" /* File access */
+#include "H5FDprivate.h" /* File drivers */
+#include "H5FDmpi.h" /* MPI-based file drivers */
+#include "H5Iprivate.h" /* IDs */
+#include "H5MMprivate.h" /* Memory management */
#include "H5Pprivate.h" /* Property lists */
#ifdef H5_HAVE_PARALLEL
@@ -1324,6 +1324,7 @@ done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5FD__mpio_read() */
+
/*-------------------------------------------------------------------------
* Function: H5FD__mpio_write
@@ -1366,6 +1367,7 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id,
#endif
int size_i;
hbool_t use_view_this_time = FALSE;
+ hbool_t derived_type = FALSE;
H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode */
herr_t ret_value = SUCCEED;
@@ -1391,8 +1393,6 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id,
if(H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off) < 0)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off")
size_i = (int)size;
- if((hsize_t)size_i != size)
- HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i")
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'w'])
@@ -1430,6 +1430,20 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id,
*/
mpi_off = 0;
} /* end if */
+ else if(size != (hsize_t)size_i) {
+ /* If HERE, then we need to work around the integer size limit
+ * of 2GB. The input size_t size variable cannot fit into an integer,
+ * but we can get around that limitation by creating a different datatype
+ * and then setting the integer size (or element count) to 1 when using
+ * the derived_type.
+ */
+
+ if (H5_mpio_create_large_type(size, 0, MPI_BYTE, &buf_type) < 0)
+ HGOTO_ERROR(H5E_INTERNAL, H5E_CANTGET, FAIL, "can't create MPI-I/O datatype")
+
+ derived_type = TRUE;
+ size_i = 1;
+ }
/* Write the data. */
if(use_view_this_time) {
@@ -1506,6 +1520,9 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id,
file->local_eof = addr + (haddr_t)bytes_written;
done:
+ if(derived_type) {
+ MPI_Type_free(&buf_type);
+ }
#ifdef H5FDmpio_DEBUG
if(H5FD_mpio_Debug[(int)'t'])
HDfprintf(stdout, "%s: Leaving, proc %d: ret_value = %d\n", FUNC, file->mpi_rank, ret_value );
diff --git a/src/H5Smpio.c b/src/H5Smpio.c
index aeec566..9112d24 100644
--- a/src/H5Smpio.c
+++ b/src/H5Smpio.c
@@ -42,11 +42,10 @@
/* Local Macros */
/****************/
#define H5S_MPIO_INITIAL_ALLOC_COUNT 256
-#define TWO_GIG_LIMIT 2147483648
-#ifndef H5S_MAX_MPI_COUNT
-#define H5S_MAX_MPI_COUNT 536870911 /* (2^29)-1 */
-#endif
+/*******************/
+/* Local Variables */
+/*******************/
/******************/
/* Local Typedefs */
@@ -88,8 +87,6 @@ static herr_t H5S__release_datatype(H5S_mpio_mpitype_list_t *type_list);
static herr_t H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down,
size_t elmt_size, const MPI_Datatype *elmt_type, MPI_Datatype *span_type,
H5S_mpio_mpitype_list_t *type_list, uint64_t op_gen);
-static herr_t H5S__mpio_create_large_type(hsize_t num_elements, MPI_Aint stride_bytes,
- MPI_Datatype old_type, MPI_Datatype *new_type);
/*****************************/
@@ -102,40 +99,9 @@ static herr_t H5S__mpio_create_large_type(hsize_t num_elements, MPI_Aint stride_
/*********************/
-/*******************/
-/* Local Variables */
-/*******************/
-static hsize_t bigio_count = H5S_MAX_MPI_COUNT;
-
/* Declare a free list to manage the H5S_mpio_mpitype_node_t struct */
H5FL_DEFINE_STATIC(H5S_mpio_mpitype_node_t);
-
-
-/*-------------------------------------------------------------------------
- * Function: H5S_mpio_set_bigio_count
- *
- * Purpose: Allow us to programatically change the switch point
- * when we utilize derived datatypes. This is of
- * particular interest for allowing nightly testing
- *
- * Return: The current/previous value of bigio_count.
- *
- * Programmer: Richard Warren, March 10, 2017
- *
- *-------------------------------------------------------------------------
- */
-hsize_t
-H5S_mpio_set_bigio_count(hsize_t new_count)
-{
- hsize_t orig_count = bigio_count;
-
- if((new_count > 0) && (new_count < TWO_GIG_LIMIT))
- bigio_count = new_count;
-
- return orig_count;
-} /* end H5S_mpio_set_bigio_count() */
-
/*-------------------------------------------------------------------------
* Function: H5S__mpio_all_type
@@ -160,6 +126,7 @@ H5S__mpio_all_type(const H5S_t *space, size_t elmt_size,
hsize_t total_bytes;
hssize_t snelmts; /* Total number of elmts (signed) */
hsize_t nelmts; /* Total number of elmts */
+ hsize_t bigio_count; /* Transition point to create derived type */
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_STATIC
@@ -173,6 +140,7 @@ H5S__mpio_all_type(const H5S_t *space, size_t elmt_size,
H5_CHECKED_ASSIGN(nelmts, hsize_t, snelmts, hssize_t);
total_bytes = (hsize_t)elmt_size * nelmts;
+ bigio_count = H5_mpio_get_bigio_count();
/* Verify that the size can be expressed as a 32 bit integer */
if(bigio_count >= total_bytes) {
@@ -183,7 +151,7 @@ H5S__mpio_all_type(const H5S_t *space, size_t elmt_size,
} /* end if */
else {
/* Create a LARGE derived datatype for this transfer */
- if(H5S__mpio_create_large_type(total_bytes, 0, MPI_BYTE, new_type) < 0)
+ if(H5_mpio_create_large_type(total_bytes, 0, MPI_BYTE, new_type) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large datatype from the all selection")
*count = 1;
*is_derived_type = TRUE;
@@ -250,6 +218,7 @@ H5S__mpio_create_point_datatype(size_t elmt_size, hsize_t num_points,
int *blocks = NULL; /* Array of block sizes for MPI hindexed create call */
hsize_t u; /* Local index variable */
#endif
+ hsize_t bigio_count; /* Transition point to create derived type */
int mpi_code; /* MPI error code */
herr_t ret_value = SUCCEED; /* Return value */
@@ -260,6 +229,8 @@ H5S__mpio_create_point_datatype(size_t elmt_size, hsize_t num_points,
HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
elmt_type_created = TRUE;
+ bigio_count = H5_mpio_get_bigio_count();
+
/* Check whether standard or BIGIO processing will be employeed */
if(bigio_count >= num_points) {
#if MPI_VERSION >= 3
@@ -518,7 +489,7 @@ done:
* selection and so the memory datatype has to be permuted using the
* permutation map created by the file selection.
*
- * Note: This routine is called from H5S_mpio_space_type(), which is
+ * Note: This routine is called from H5_mpio_space_type(), which is
* called first for the file dataspace and creates
*
* Return: Non-negative on success, negative on failure.
@@ -678,6 +649,7 @@ H5S__mpio_reg_hyper_type(const H5S_t *space, size_t elmt_size,
hsize_t count;
} d[H5S_MAX_RANK];
+ hsize_t bigio_count; /* Transition point to create derived type */
hsize_t offset[H5S_MAX_RANK];
hsize_t max_xtent[H5S_MAX_RANK];
H5S_hyper_dim_t *diminfo; /* [rank] */
@@ -696,6 +668,7 @@ H5S__mpio_reg_hyper_type(const H5S_t *space, size_t elmt_size,
HDassert(space);
HDassert(sizeof(MPI_Aint) >= sizeof(elmt_size));
+ bigio_count = H5_mpio_get_bigio_count();
/* Initialize selection iterator */
if(H5S_select_iter_init(&sel_iter, space, elmt_size, 0) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator")
@@ -824,7 +797,7 @@ if(H5DEBUG(S)) {
} /* end if */
else
/* Create the compound datatype for this operation (> 2GB) */
- if(H5S__mpio_create_large_type(elmt_size, 0, MPI_BYTE, &inner_type) < 0)
+ if(H5_mpio_create_large_type(elmt_size, 0, MPI_BYTE, &inner_type) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large inner datatype in hyper selection")
/*******************************************************
@@ -878,7 +851,7 @@ if(H5DEBUG(S))
* Again we need to check that the number of BLOCKS can fit into
* a 32 bit integer */
if(bigio_count < d[i].block) {
- if(H5S__mpio_create_large_type(d[i].block, 0, inner_type, &block_type) < 0)
+ if(H5_mpio_create_large_type(d[i].block, 0, inner_type, &block_type) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large block datatype in hyper selection")
} /* end if */
else
@@ -899,7 +872,7 @@ if(H5DEBUG(S))
* we call the large type creation function to handle that
*/
if(bigio_count < d[i].count) {
- if(H5S__mpio_create_large_type(d[i].count, stride_in_bytes, block_type, &outer_type) < 0)
+ if(H5_mpio_create_large_type(d[i].count, stride_in_bytes, block_type, &outer_type) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large outer datatype in hyper selection")
} /* end if */
/* otherwise a regular create_hvector will do */
@@ -1001,6 +974,7 @@ H5S__mpio_span_hyper_type(const H5S_t *space, size_t elmt_size,
MPI_Datatype elmt_type; /* MPI datatype for an element */
hbool_t elmt_type_is_derived = FALSE; /* Whether the element type has been created */
MPI_Datatype span_type; /* MPI datatype for overall span tree */
+ hsize_t bigio_count; /* Transition point to create derived type */
hsize_t down[H5S_MAX_RANK]; /* 'down' sizes for each dimension */
uint64_t op_gen; /* Operation generation value */
int mpi_code; /* MPI return code */
@@ -1014,13 +988,14 @@ H5S__mpio_span_hyper_type(const H5S_t *space, size_t elmt_size,
HDassert(space->select.sel_info.hslab->span_lst);
HDassert(space->select.sel_info.hslab->span_lst->head);
+ bigio_count = H5_mpio_get_bigio_count();
/* Create the base type for an element */
if(bigio_count >= elmt_size) {
if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &elmt_type)))
HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
} /* end if */
else
- if(H5S__mpio_create_large_type(elmt_size, 0, MPI_BYTE, &elmt_type) < 0)
+ if(H5_mpio_create_large_type(elmt_size, 0, MPI_BYTE, &elmt_type) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large element datatype in span_hyper selection")
elmt_type_is_derived = TRUE;
@@ -1124,8 +1099,10 @@ H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down,
H5S_mpio_mpitype_list_t *type_list, uint64_t op_gen)
{
H5S_hyper_span_t *span; /* Hyperslab span to iterate with */
+ hsize_t bigio_count; /* Transition point to create derived type */
+
size_t alloc_count = 0; /* Number of span tree nodes allocated at this level */
- size_t outercount; /* Number of span tree nodes at this level */
+ size_t outercount = 0; /* Number of span tree nodes at this level */
MPI_Datatype *inner_type = NULL;
hbool_t inner_types_freed = FALSE; /* Whether the inner_type MPI datatypes have been freed */
int *blocklen = NULL;
@@ -1140,6 +1117,7 @@ H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down,
HDassert(spans);
HDassert(type_list);
+ bigio_count = H5_mpio_get_bigio_count();
/* Check if we've visited this span tree before */
if(spans->op_gen != op_gen) {
H5S_mpio_mpitype_node_t *type_node; /* Pointer to new node in MPI data type list */
@@ -1185,7 +1163,7 @@ H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down,
H5_CHECK_OVERFLOW(nelmts, hsize_t, int)
blocklen[outercount] = (int)nelmts;
- if(bigio_count < blocklen[outercount])
+ if(bigio_count < (hsize_t)blocklen[outercount])
large_block = TRUE; /* at least one block type is large, so set this flag to true */
span = span->next;
@@ -1202,8 +1180,8 @@ H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down,
MPI_Datatype temp_type = MPI_DATATYPE_NULL;
/* create the block type from elmt_type while checking the 32 bit int limit */
- if(blocklen[u] > bigio_count) {
- if(H5S__mpio_create_large_type(blocklen[u], 0, *elmt_type, &temp_type) < 0)
+ if((hsize_t)(blocklen[u]) > bigio_count) {
+ if(H5_mpio_create_large_type(blocklen[u], 0, *elmt_type, &temp_type) < 0)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large element datatype in span_hyper selection")
} /* end if */
else
@@ -1453,113 +1431,5 @@ done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5S_mpio_space_type() */
-
-/*-------------------------------------------------------------------------
- * Function: H5S__mpio_create_large_type
- *
- * Purpose: Create a large datatype of size larger than what a 32 bit integer
- * can hold.
- *
- * Return: Non-negative on success, negative on failure.
- *
- * *new_type the new datatype created
- *
- * Programmer: Mohamad Chaarawi
- *
- *-------------------------------------------------------------------------
- */
-static herr_t
-H5S__mpio_create_large_type(hsize_t num_elements, MPI_Aint stride_bytes,
- MPI_Datatype old_type, MPI_Datatype *new_type)
-{
- int num_big_types; /* num times the 2G datatype will be repeated */
- int remaining_bytes; /* the number of bytes left that can be held in an int value */
- hsize_t leftover;
- int block_len[2];
- int mpi_code; /* MPI return code */
- MPI_Datatype inner_type, outer_type, leftover_type, type[2];
- MPI_Aint disp[2], old_extent;
- herr_t ret_value = SUCCEED; /* Return value */
-
- FUNC_ENTER_STATIC
-
- /* Calculate how many Big MPI datatypes are needed to represent the buffer */
- num_big_types = (int)(num_elements/bigio_count);
- leftover = num_elements - num_big_types * (hsize_t)bigio_count;
- H5_CHECKED_ASSIGN(remaining_bytes, int, leftover, hsize_t);
-
- /* Create a contiguous datatype of size equal to the largest
- * number that a 32 bit integer can hold x size of old type.
- * If the displacement is 0, then the type is contiguous, otherwise
- * use type_hvector to create the type with the displacement provided
- */
- if (0 == stride_bytes) {
- if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(bigio_count, old_type, &inner_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
- } /* end if */
- else
- if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector(bigio_count, 1, stride_bytes, old_type, &inner_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
-
- /* Create a contiguous datatype of the buffer (minus the remaining < 2GB part)
- * If a stride is present, use hvector type
- */
- if(0 == stride_bytes) {
- if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(num_big_types, inner_type, &outer_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
- } /* end if */
- else
- if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector(num_big_types, 1, stride_bytes, inner_type, &outer_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
-
- MPI_Type_free(&inner_type);
-
- /* If there is a remaining part create a contiguous/vector datatype and then
- * use a struct datatype to encapsulate everything.
- */
- if(remaining_bytes) {
- if(stride_bytes == 0) {
- if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(remaining_bytes, old_type, &leftover_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
- } /* end if */
- else
- if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector((int)(num_elements - (hsize_t)num_big_types * bigio_count), 1, stride_bytes, old_type, &leftover_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
-
- /* As of version 4.0, OpenMPI now turns off MPI-1 API calls by default,
- * so we're using the MPI-2 version even though we don't need the lb
- * value.
- */
- {
- MPI_Aint unused_lb_arg;
- MPI_Type_get_extent(old_type, &unused_lb_arg, &old_extent);
- }
-
- /* Set up the arguments for MPI_Type_struct constructor */
- type[0] = outer_type;
- type[1] = leftover_type;
- block_len[0] = 1;
- block_len[1] = 1;
- disp[0] = 0;
- disp[1] = (old_extent + stride_bytes) * num_big_types * (MPI_Aint)bigio_count;
-
- if(MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(2, block_len, disp, type, new_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
-
- MPI_Type_free(&outer_type);
- MPI_Type_free(&leftover_type);
- } /* end if */
- else
- /* There are no remaining bytes so just set the new type to
- * the outer type created */
- *new_type = outer_type;
-
- if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
-
-done:
- FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5S__mpio_create_large_type() */
-
#endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5Sprivate.h b/src/H5Sprivate.h
index 0a9d2e7..3d68de0 100644
--- a/src/H5Sprivate.h
+++ b/src/H5Sprivate.h
@@ -307,7 +307,6 @@ H5_DLL herr_t H5S_select_iter_release(H5S_sel_iter_t *sel_iter);
H5_DLL herr_t H5S_sel_iter_close(H5S_sel_iter_t *sel_iter);
#ifdef H5_HAVE_PARALLEL
-H5_DLL hsize_t H5S_mpio_set_bigio_count(hsize_t new_count);
H5_DLL herr_t H5S_mpio_space_type(const H5S_t *space, size_t elmt_size,
/* out: */ MPI_Datatype *new_type,
int *count,
diff --git a/src/H5mpi.c b/src/H5mpi.c
index d48790b..f01e16a 100644
--- a/src/H5mpi.c
+++ b/src/H5mpi.c
@@ -22,6 +22,64 @@
#ifdef H5_HAVE_PARALLEL
+
+/****************/
+/* Local Macros */
+/****************/
+#define TWO_GIG_LIMIT (1 << 31)
+#ifndef H5_MAX_MPI_COUNT
+#define H5_MAX_MPI_COUNT (1 << 30)
+#endif
+
+/*******************/
+/* Local Variables */
+/*******************/
+static hsize_t bigio_count = H5_MAX_MPI_COUNT;
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5_mpio_set_bigio_count
+ *
+ * Purpose: Allow us to programatically change the switch point
+ * when we utilize derived datatypes. This is of
+ * particular interest for allowing nightly testing
+ *
+ * Return: The current/previous value of bigio_count.
+ *
+ * Programmer: Richard Warren, March 10, 2017
+ *
+ *-------------------------------------------------------------------------
+ */
+hsize_t
+H5_mpio_set_bigio_count(hsize_t new_count)
+{
+ hsize_t orig_count = bigio_count;
+
+ if((new_count > 0) && (new_count < (hsize_t)TWO_GIG_LIMIT)) {
+ bigio_count = new_count;
+ }
+ return orig_count;
+} /* end H5_mpio_set_bigio_count() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5_mpio_get_bigio_count
+ *
+ * Purpose: Allow other HDF5 library functions to access
+ * the current value for bigio_count.
+ *
+ * Return: The current/previous value of bigio_count.
+ *
+ * Programmer: Richard Warren, October 7, 2019
+ *
+ *-------------------------------------------------------------------------
+ */
+hsize_t
+H5_mpio_get_bigio_count()
+{
+ return bigio_count;
+}
+
/*-------------------------------------------------------------------------
* Function: H5_mpi_comm_dup
@@ -392,5 +450,114 @@ done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5_mpi_info_cmp() */
+
+/*-------------------------------------------------------------------------
+ * Function: H5_mpio_create_large_type
+ *
+ * Purpose: Create a large datatype of size larger than what a 32 bit integer
+ * can hold.
+ *
+ * Return: Non-negative on success, negative on failure.
+ *
+ * *new_type the new datatype created
+ *
+ * Programmer: Mohamad Chaarawi
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5_mpio_create_large_type(hsize_t num_elements, MPI_Aint stride_bytes,
+ MPI_Datatype old_type, MPI_Datatype *new_type)
+{
+ int num_big_types; /* num times the 2G datatype will be repeated */
+ int remaining_bytes; /* the number of bytes left that can be held in an int value */
+ hsize_t leftover;
+ int block_len[2];
+ int mpi_code; /* MPI return code */
+ MPI_Datatype inner_type, outer_type, leftover_type, type[2];
+ MPI_Aint disp[2], old_extent;
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+ /* Calculate how many Big MPI datatypes are needed to represent the buffer */
+ num_big_types = (int)(num_elements/bigio_count);
+ leftover = num_elements - num_big_types * (hsize_t)bigio_count;
+ H5_CHECKED_ASSIGN(remaining_bytes, int, leftover, hsize_t);
+
+ /* Create a contiguous datatype of size equal to the largest
+ * number that a 32 bit integer can hold x size of old type.
+ * If the displacement is 0, then the type is contiguous, otherwise
+ * use type_hvector to create the type with the displacement provided
+ */
+ if (0 == stride_bytes) {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(bigio_count, old_type, &inner_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
+ } /* end if */
+ else
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector(bigio_count, 1, stride_bytes, old_type, &inner_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
+
+ /* Create a contiguous datatype of the buffer (minus the remaining < 2GB part)
+ * If a stride is present, use hvector type
+ */
+ if(0 == stride_bytes) {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(num_big_types, inner_type, &outer_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
+ } /* end if */
+ else
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector(num_big_types, 1, stride_bytes, inner_type, &outer_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
+
+ MPI_Type_free(&inner_type);
+
+ /* If there is a remaining part create a contiguous/vector datatype and then
+ * use a struct datatype to encapsulate everything.
+ */
+ if(remaining_bytes) {
+ if(stride_bytes == 0) {
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(remaining_bytes, old_type, &leftover_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
+ } /* end if */
+ else
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector((int)(num_elements - (hsize_t)num_big_types * bigio_count), 1, stride_bytes, old_type, &leftover_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
+
+ /* As of version 4.0, OpenMPI now turns off MPI-1 API calls by default,
+ * so we're using the MPI-2 version even though we don't need the lb
+ * value.
+ */
+ {
+ MPI_Aint unused_lb_arg;
+ MPI_Type_get_extent(old_type, &unused_lb_arg, &old_extent);
+ }
+
+ /* Set up the arguments for MPI_Type_struct constructor */
+ type[0] = outer_type;
+ type[1] = leftover_type;
+ block_len[0] = 1;
+ block_len[1] = 1;
+ disp[0] = 0;
+ disp[1] = (old_extent + stride_bytes) * num_big_types * (MPI_Aint)bigio_count;
+
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(2, block_len, disp, type, new_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
+
+ MPI_Type_free(&outer_type);
+ MPI_Type_free(&leftover_type);
+ } /* end if */
+ else
+ /* There are no remaining bytes so just set the new type to
+ * the outer type created */
+ *new_type = outer_type;
+
+ if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5_mpio_create_large_type() */
+
+
#endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5private.h b/src/H5private.h
index bb2a2eb..4ae9f12 100644
--- a/src/H5private.h
+++ b/src/H5private.h
@@ -2670,12 +2670,15 @@ H5_DLL herr_t H5_combine_path(const char *path1, const char *path2, char **ful
#ifdef H5_HAVE_PARALLEL
/* Generic MPI functions */
+H5_DLL hsize_t H5_mpio_get_bigio_count();
H5_DLL herr_t H5_mpi_comm_dup(MPI_Comm comm, MPI_Comm *comm_new);
H5_DLL herr_t H5_mpi_info_dup(MPI_Info info, MPI_Info *info_new);
H5_DLL herr_t H5_mpi_comm_free(MPI_Comm *comm);
H5_DLL herr_t H5_mpi_info_free(MPI_Info *info);
H5_DLL herr_t H5_mpi_comm_cmp(MPI_Comm comm1, MPI_Comm comm2, int *result);
H5_DLL herr_t H5_mpi_info_cmp(MPI_Info info1, MPI_Info info2, int *result);
+H5_DLL herr_t H5_mpio_create_large_type(hsize_t num_elements, MPI_Aint stride_bytes,
+ MPI_Datatype old_type, MPI_Datatype *new_type);
#endif /* H5_HAVE_PARALLEL */
/* Functions for debugging */
diff --git a/testpar/t_bigio.c b/testpar/t_bigio.c
index 9ca077c..fe96c83 100644
--- a/testpar/t_bigio.c
+++ b/testpar/t_bigio.c
@@ -4,7 +4,8 @@
#include "H5Dprivate.h" /* For Chunk tests */
/* FILENAME and filenames must have the same number of names */
-const char *FILENAME[2]={ "bigio_test.h5",
+const char *FILENAME[3]={ "bigio_test.h5",
+ "single_rank_independent_io.h5",
NULL
};
@@ -29,7 +30,8 @@ const char *FILENAME[2]={ "bigio_test.h5",
#define DATASET5 "DSET5"
#define DXFER_COLLECTIVE_IO 0x1 /* Collective IO*/
#define DXFER_INDEPENDENT_IO 0x2 /* Independent IO collectively */
-#define DXFER_BIGCOUNT 536870916
+#define DXFER_BIGCOUNT (1 < 29)
+#define LARGE_DIM 1610612736
#define HYPER 1
#define POINT 2
@@ -40,7 +42,7 @@ typedef hsize_t B_DATATYPE;
int facc_type = FACC_MPIO; /*Test file access type */
int dxfer_coll_type = DXFER_COLLECTIVE_IO;
-size_t bigcount = DXFER_BIGCOUNT;
+size_t bigcount = (size_t)DXFER_BIGCOUNT;
int nerrors = 0;
int mpi_size, mpi_rank;
@@ -51,6 +53,8 @@ static void coll_chunktest(const char* filename, int chunk_factor, int select_fa
int api_option, int file_selection, int mem_selection, int mode);
hid_t create_faccess_plist(MPI_Comm comm, MPI_Info info, int l_facc_type);
+hsize_t H5_mpio_set_bigio_count(hsize_t new_count);
+
/*
* Setup the coordinates for point selection.
*/
@@ -478,22 +482,19 @@ static void
dataset_big_write(void)
{
- hid_t xfer_plist; /* Dataset transfer properties list */
- hid_t sid; /* Dataspace ID */
- hid_t file_dataspace; /* File dataspace ID */
- hid_t mem_dataspace; /* memory dataspace ID */
+ hid_t xfer_plist; /* Dataset transfer properties list */
+ hid_t sid; /* Dataspace ID */
+ hid_t file_dataspace; /* File dataspace ID */
+ hid_t mem_dataspace; /* memory dataspace ID */
hid_t dataset;
- hid_t datatype; /* Datatype ID */
- hsize_t dims[RANK]; /* dataset dim sizes */
- hsize_t start[RANK]; /* for hyperslab setting */
- hsize_t count[RANK], stride[RANK]; /* for hyperslab setting */
- hsize_t block[RANK]; /* for hyperslab setting */
+ hsize_t dims[RANK]; /* dataset dim sizes */
+ hsize_t start[RANK]; /* for hyperslab setting */
+ hsize_t count[RANK],stride[RANK]; /* for hyperslab setting */
+ hsize_t block[RANK]; /* for hyperslab setting */
hsize_t *coords = NULL;
- int i;
- herr_t ret; /* Generic return value */
- hid_t fid; /* HDF5 file ID */
- hid_t acc_tpl; /* File access templates */
- hsize_t h;
+ herr_t ret; /* Generic return value */
+ hid_t fid; /* HDF5 file ID */
+ hid_t acc_tpl; /* File access templates */
size_t num_points;
B_DATATYPE * wdata;
@@ -806,8 +807,6 @@ dataset_big_read(void)
hsize_t start[RANK]; /* for hyperslab setting */
hsize_t count[RANK], stride[RANK]; /* for hyperslab setting */
hsize_t block[RANK]; /* for hyperslab setting */
- int i,j,k;
- hsize_t h;
size_t num_points;
hsize_t *coords = NULL;
herr_t ret; /* Generic return value */
@@ -1120,6 +1119,63 @@ dataset_big_read(void)
} /* dataset_large_readAll */
+static void
+single_rank_independent_io(void)
+{
+ if (mpi_rank == 0)
+ HDprintf("single_rank_independent_io\n");
+
+ if (MAINPROCESS) {
+ hsize_t dims[] = { LARGE_DIM };
+ hid_t file_id = -1;
+ hid_t fapl_id = -1;
+ hid_t dset_id = -1;
+ hid_t fspace_id = -1;
+ hid_t mspace_id = -1;
+ void *data = NULL;
+
+ fapl_id = H5Pcreate(H5P_FILE_ACCESS);
+ VRFY((fapl_id >= 0), "H5P_FILE_ACCESS");
+
+ H5Pset_fapl_mpio(fapl_id, MPI_COMM_SELF, MPI_INFO_NULL);
+ file_id = H5Fcreate(FILENAME[1], H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
+ VRFY((file_id >= 0), "H5Dcreate2 succeeded");
+
+ fspace_id = H5Screate_simple(1, dims, NULL);
+ VRFY((fspace_id >= 0), "H5Screate_simple fspace_id succeeded");
+
+ /*
+ * Create and write to a >2GB dataset from a single rank.
+ */
+ dset_id = H5Dcreate2(file_id, "test_dset", H5T_NATIVE_INT, fspace_id,
+ H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+
+ VRFY((dset_id >= 0), "H5Dcreate2 succeeded");
+
+ data = malloc(LARGE_DIM * sizeof(int));
+
+ if (mpi_rank == 0)
+ H5Sselect_all(fspace_id);
+ else
+ H5Sselect_none(fspace_id);
+
+ dims[0] = LARGE_DIM;
+ mspace_id = H5Screate_simple(1, dims, NULL);
+ VRFY((mspace_id >= 0), "H5Screate_simple mspace_id succeeded");
+ H5Dwrite(dset_id, H5T_NATIVE_INT, mspace_id, fspace_id, H5P_DEFAULT, data);
+
+ free(data);
+ H5Sclose(mspace_id);
+ H5Sclose(fspace_id);
+ H5Pclose(fapl_id);
+ H5Dclose(dset_id);
+ H5Fclose(file_id);
+
+ HDremove(FILENAME[1]);
+
+ }
+ MPI_Barrier(MPI_COMM_WORLD);
+}
/*
* Create the appropriate File access property list
@@ -1395,7 +1451,6 @@ coll_chunktest(const char* filename,
size_t num_points; /* for point selection */
hsize_t *coords = NULL; /* for point selection */
- int i;
/* Create the data space */
@@ -1873,7 +1928,7 @@ int main(int argc, char **argv)
int ExpressMode = 0;
hsize_t newsize = 1048576;
/* Set the bigio processing limit to be 'newsize' bytes */
- hsize_t oldsize = H5S_mpio_set_bigio_count(newsize);
+ hsize_t oldsize = H5_mpio_set_bigio_count(newsize);
/* Having set the bigio handling to a size that is managable,
* we'll set our 'bigcount' variable to be 2X that limit so
@@ -1918,6 +1973,8 @@ int main(int argc, char **argv)
coll_chunk2();
MPI_Barrier(MPI_COMM_WORLD);
coll_chunk3();
+ MPI_Barrier(MPI_COMM_WORLD);
+ single_rank_independent_io();
}
/* turn off alarm */
diff --git a/testpar/t_file.c b/testpar/t_file.c
index 204095b..99ac189 100644
--- a/testpar/t_file.c
+++ b/testpar/t_file.c
@@ -145,7 +145,7 @@ test_page_buffer_access(void)
ret = H5Pset_file_space_strategy(fcpl, H5F_FSPACE_STRATEGY_PAGE, 1, (hsize_t)0);
VRFY((ret == 0), "");
- ret = H5Pset_file_space_page_size(fcpl, sizeof(int)*100);
+ ret = H5Pset_file_space_page_size(fcpl, sizeof(int)*128);
VRFY((ret == 0), "");
ret = H5Pset_page_buffer_size(fapl, sizeof(int)*100000, 0, 0);
VRFY((ret == 0), "");
@@ -180,7 +180,6 @@ test_page_buffer_access(void)
data[i] = -1;
if(MAINPROCESS) {
hid_t fapl_self = H5I_INVALID_HID;
-
fapl_self = create_faccess_plist(MPI_COMM_SELF, MPI_INFO_NULL, facc_type);
ret = H5Pset_page_buffer_size(fapl_self, sizeof(int)*1000, 0, 0);
@@ -433,7 +432,7 @@ create_file(const char *filename, hid_t fcpl, hid_t fapl, int metadata_write_str
hsize_t dims[RANK], i;
hsize_t num_elements;
int k;
- char dset_name[10];
+ char dset_name[20];
H5F_t *f = NULL;
H5C_t *cache_ptr = NULL;
H5AC_cache_config_t config;
@@ -590,7 +589,7 @@ open_file(const char *filename, hid_t fapl, int metadata_write_strategy,
hsize_t block[RANK];
int i, k, ndims;
hsize_t num_elements;
- char dset_name[10];
+ char dset_name[20];
H5F_t *f = NULL;
H5C_t *cache_ptr = NULL;
H5AC_cache_config_t config;
@@ -665,8 +664,8 @@ open_file(const char *filename, hid_t fapl, int metadata_write_strategy,
ndims = H5Sget_simple_extent_dims(sid, dims, NULL);
VRFY((ndims == 2), "H5Sget_simple_extent_dims succeeded");
- VRFY(dims[0] == ROW_FACTOR*mpi_size, "Wrong dataset dimensions");
- VRFY(dims[1] == COL_FACTOR*mpi_size, "Wrong dataset dimensions");
+ VRFY(dims[0] == (hsize_t)(ROW_FACTOR*mpi_size), "Wrong dataset dimensions");
+ VRFY(dims[1] == (hsize_t)(COL_FACTOR*mpi_size), "Wrong dataset dimensions");
ret = H5Sselect_hyperslab(sid, H5S_SELECT_SET, start, stride, count, block);
VRFY((ret >= 0), "H5Sset_hyperslab succeeded");
@@ -679,7 +678,7 @@ open_file(const char *filename, hid_t fapl, int metadata_write_strategy,
ret = H5Sclose(sid);
VRFY((ret == 0), "");
- for (i=0; i < num_elements; i++)
+ for (i=0; i < (int)num_elements; i++)
VRFY((data_array[i] == mpi_rank+1), "Dataset Verify failed");
}