From 4284401fdeb5621e00a8b218b06d49e107b8821c Mon Sep 17 00:00:00 2001 From: Richard Warren Date: Tue, 15 Oct 2019 09:31:24 -0400 Subject: Initial 2GB port from develop to the 1_12 branch --- src/H5FDmpio.c | 37 ++++++++--- src/H5Smpio.c | 180 ++++++++---------------------------------------------- src/H5Sprivate.h | 1 - src/H5mpi.c | 167 ++++++++++++++++++++++++++++++++++++++++++++++++++ testpar/t_bigio.c | 99 +++++++++++++++++++++++------- 5 files changed, 297 insertions(+), 187 deletions(-) diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index 71e9fe1..11f0411 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -22,15 +22,15 @@ #include "H5FDdrvr_module.h" /* This source code file is part of the H5FD driver module */ -#include "H5private.h" /* Generic Functions */ +#include "H5private.h" /* Generic Functions */ #include "H5CXprivate.h" /* API Contexts */ -#include "H5Dprivate.h" /* Dataset functions */ -#include "H5Eprivate.h" /* Error handling */ -#include "H5Fprivate.h" /* File access */ -#include "H5FDprivate.h" /* File drivers */ -#include "H5FDmpi.h" /* MPI-based file drivers */ -#include "H5Iprivate.h" /* IDs */ -#include "H5MMprivate.h" /* Memory management */ +#include "H5Dprivate.h" /* Dataset functions */ +#include "H5Eprivate.h" /* Error handling */ +#include "H5Fprivate.h" /* File access */ +#include "H5FDprivate.h" /* File drivers */ +#include "H5FDmpi.h" /* MPI-based file drivers */ +#include "H5Iprivate.h" /* IDs */ +#include "H5MMprivate.h" /* Memory management */ #include "H5Pprivate.h" /* Property lists */ #ifdef H5_HAVE_PARALLEL @@ -1324,6 +1324,7 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5FD__mpio_read() */ + /*------------------------------------------------------------------------- * Function: H5FD__mpio_write @@ -1366,6 +1367,7 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id, #endif int size_i; hbool_t use_view_this_time = FALSE; + hbool_t derived_type = FALSE; H5FD_mpio_xfer_t xfer_mode; /* I/O transfer mode */ herr_t ret_value = SUCCEED; @@ -1391,8 +1393,6 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id, if(H5FD_mpi_haddr_to_MPIOff(addr, &mpi_off) < 0) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr to MPI off") size_i = (int)size; - if((hsize_t)size_i != size) - HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size to size_i") #ifdef H5FDmpio_DEBUG if(H5FD_mpio_Debug[(int)'w']) @@ -1430,6 +1430,20 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id, */ mpi_off = 0; } /* end if */ + else if(size != (hsize_t)size_i) { + /* If HERE, then we need to work around the integer size limit + * of 2GB. The input size_t size variable cannot fit into an integer, + * but we can get around that limitation by creating a different datatype + * and then setting the integer size (or element count) to 1 when using + * the derived_type. + */ + + if (H5_mpio_create_large_type(size, 0, MPI_BYTE, &buf_type) < 0) + HGOTO_ERROR(H5E_INTERNAL, H5E_CANTGET, FAIL, "can't create MPI-I/O datatype") + + derived_type = TRUE; + size_i = 1; + } /* Write the data. */ if(use_view_this_time) { @@ -1506,6 +1520,9 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id, file->local_eof = addr + (haddr_t)bytes_written; done: + if(derived_type) { + MPI_Type_free(&buf_type); + } #ifdef H5FDmpio_DEBUG if(H5FD_mpio_Debug[(int)'t']) HDfprintf(stdout, "%s: Leaving, proc %d: ret_value = %d\n", FUNC, file->mpi_rank, ret_value ); diff --git a/src/H5Smpio.c b/src/H5Smpio.c index aeec566..9112d24 100644 --- a/src/H5Smpio.c +++ b/src/H5Smpio.c @@ -42,11 +42,10 @@ /* Local Macros */ /****************/ #define H5S_MPIO_INITIAL_ALLOC_COUNT 256 -#define TWO_GIG_LIMIT 2147483648 -#ifndef H5S_MAX_MPI_COUNT -#define H5S_MAX_MPI_COUNT 536870911 /* (2^29)-1 */ -#endif +/*******************/ +/* Local Variables */ +/*******************/ /******************/ /* Local Typedefs */ @@ -88,8 +87,6 @@ static herr_t H5S__release_datatype(H5S_mpio_mpitype_list_t *type_list); static herr_t H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down, size_t elmt_size, const MPI_Datatype *elmt_type, MPI_Datatype *span_type, H5S_mpio_mpitype_list_t *type_list, uint64_t op_gen); -static herr_t H5S__mpio_create_large_type(hsize_t num_elements, MPI_Aint stride_bytes, - MPI_Datatype old_type, MPI_Datatype *new_type); /*****************************/ @@ -102,40 +99,9 @@ static herr_t H5S__mpio_create_large_type(hsize_t num_elements, MPI_Aint stride_ /*********************/ -/*******************/ -/* Local Variables */ -/*******************/ -static hsize_t bigio_count = H5S_MAX_MPI_COUNT; - /* Declare a free list to manage the H5S_mpio_mpitype_node_t struct */ H5FL_DEFINE_STATIC(H5S_mpio_mpitype_node_t); - - -/*------------------------------------------------------------------------- - * Function: H5S_mpio_set_bigio_count - * - * Purpose: Allow us to programatically change the switch point - * when we utilize derived datatypes. This is of - * particular interest for allowing nightly testing - * - * Return: The current/previous value of bigio_count. - * - * Programmer: Richard Warren, March 10, 2017 - * - *------------------------------------------------------------------------- - */ -hsize_t -H5S_mpio_set_bigio_count(hsize_t new_count) -{ - hsize_t orig_count = bigio_count; - - if((new_count > 0) && (new_count < TWO_GIG_LIMIT)) - bigio_count = new_count; - - return orig_count; -} /* end H5S_mpio_set_bigio_count() */ - /*------------------------------------------------------------------------- * Function: H5S__mpio_all_type @@ -160,6 +126,7 @@ H5S__mpio_all_type(const H5S_t *space, size_t elmt_size, hsize_t total_bytes; hssize_t snelmts; /* Total number of elmts (signed) */ hsize_t nelmts; /* Total number of elmts */ + hsize_t bigio_count; /* Transition point to create derived type */ herr_t ret_value = SUCCEED; /* Return value */ FUNC_ENTER_STATIC @@ -173,6 +140,7 @@ H5S__mpio_all_type(const H5S_t *space, size_t elmt_size, H5_CHECKED_ASSIGN(nelmts, hsize_t, snelmts, hssize_t); total_bytes = (hsize_t)elmt_size * nelmts; + bigio_count = H5_mpio_get_bigio_count(); /* Verify that the size can be expressed as a 32 bit integer */ if(bigio_count >= total_bytes) { @@ -183,7 +151,7 @@ H5S__mpio_all_type(const H5S_t *space, size_t elmt_size, } /* end if */ else { /* Create a LARGE derived datatype for this transfer */ - if(H5S__mpio_create_large_type(total_bytes, 0, MPI_BYTE, new_type) < 0) + if(H5_mpio_create_large_type(total_bytes, 0, MPI_BYTE, new_type) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large datatype from the all selection") *count = 1; *is_derived_type = TRUE; @@ -250,6 +218,7 @@ H5S__mpio_create_point_datatype(size_t elmt_size, hsize_t num_points, int *blocks = NULL; /* Array of block sizes for MPI hindexed create call */ hsize_t u; /* Local index variable */ #endif + hsize_t bigio_count; /* Transition point to create derived type */ int mpi_code; /* MPI error code */ herr_t ret_value = SUCCEED; /* Return value */ @@ -260,6 +229,8 @@ H5S__mpio_create_point_datatype(size_t elmt_size, hsize_t num_points, HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) elmt_type_created = TRUE; + bigio_count = H5_mpio_get_bigio_count(); + /* Check whether standard or BIGIO processing will be employeed */ if(bigio_count >= num_points) { #if MPI_VERSION >= 3 @@ -518,7 +489,7 @@ done: * selection and so the memory datatype has to be permuted using the * permutation map created by the file selection. * - * Note: This routine is called from H5S_mpio_space_type(), which is + * Note: This routine is called from H5_mpio_space_type(), which is * called first for the file dataspace and creates * * Return: Non-negative on success, negative on failure. @@ -678,6 +649,7 @@ H5S__mpio_reg_hyper_type(const H5S_t *space, size_t elmt_size, hsize_t count; } d[H5S_MAX_RANK]; + hsize_t bigio_count; /* Transition point to create derived type */ hsize_t offset[H5S_MAX_RANK]; hsize_t max_xtent[H5S_MAX_RANK]; H5S_hyper_dim_t *diminfo; /* [rank] */ @@ -696,6 +668,7 @@ H5S__mpio_reg_hyper_type(const H5S_t *space, size_t elmt_size, HDassert(space); HDassert(sizeof(MPI_Aint) >= sizeof(elmt_size)); + bigio_count = H5_mpio_get_bigio_count(); /* Initialize selection iterator */ if(H5S_select_iter_init(&sel_iter, space, elmt_size, 0) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator") @@ -824,7 +797,7 @@ if(H5DEBUG(S)) { } /* end if */ else /* Create the compound datatype for this operation (> 2GB) */ - if(H5S__mpio_create_large_type(elmt_size, 0, MPI_BYTE, &inner_type) < 0) + if(H5_mpio_create_large_type(elmt_size, 0, MPI_BYTE, &inner_type) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large inner datatype in hyper selection") /******************************************************* @@ -878,7 +851,7 @@ if(H5DEBUG(S)) * Again we need to check that the number of BLOCKS can fit into * a 32 bit integer */ if(bigio_count < d[i].block) { - if(H5S__mpio_create_large_type(d[i].block, 0, inner_type, &block_type) < 0) + if(H5_mpio_create_large_type(d[i].block, 0, inner_type, &block_type) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large block datatype in hyper selection") } /* end if */ else @@ -899,7 +872,7 @@ if(H5DEBUG(S)) * we call the large type creation function to handle that */ if(bigio_count < d[i].count) { - if(H5S__mpio_create_large_type(d[i].count, stride_in_bytes, block_type, &outer_type) < 0) + if(H5_mpio_create_large_type(d[i].count, stride_in_bytes, block_type, &outer_type) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large outer datatype in hyper selection") } /* end if */ /* otherwise a regular create_hvector will do */ @@ -1001,6 +974,7 @@ H5S__mpio_span_hyper_type(const H5S_t *space, size_t elmt_size, MPI_Datatype elmt_type; /* MPI datatype for an element */ hbool_t elmt_type_is_derived = FALSE; /* Whether the element type has been created */ MPI_Datatype span_type; /* MPI datatype for overall span tree */ + hsize_t bigio_count; /* Transition point to create derived type */ hsize_t down[H5S_MAX_RANK]; /* 'down' sizes for each dimension */ uint64_t op_gen; /* Operation generation value */ int mpi_code; /* MPI return code */ @@ -1014,13 +988,14 @@ H5S__mpio_span_hyper_type(const H5S_t *space, size_t elmt_size, HDassert(space->select.sel_info.hslab->span_lst); HDassert(space->select.sel_info.hslab->span_lst->head); + bigio_count = H5_mpio_get_bigio_count(); /* Create the base type for an element */ if(bigio_count >= elmt_size) { if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &elmt_type))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) } /* end if */ else - if(H5S__mpio_create_large_type(elmt_size, 0, MPI_BYTE, &elmt_type) < 0) + if(H5_mpio_create_large_type(elmt_size, 0, MPI_BYTE, &elmt_type) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large element datatype in span_hyper selection") elmt_type_is_derived = TRUE; @@ -1124,8 +1099,10 @@ H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down, H5S_mpio_mpitype_list_t *type_list, uint64_t op_gen) { H5S_hyper_span_t *span; /* Hyperslab span to iterate with */ + hsize_t bigio_count; /* Transition point to create derived type */ + size_t alloc_count = 0; /* Number of span tree nodes allocated at this level */ - size_t outercount; /* Number of span tree nodes at this level */ + size_t outercount = 0; /* Number of span tree nodes at this level */ MPI_Datatype *inner_type = NULL; hbool_t inner_types_freed = FALSE; /* Whether the inner_type MPI datatypes have been freed */ int *blocklen = NULL; @@ -1140,6 +1117,7 @@ H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down, HDassert(spans); HDassert(type_list); + bigio_count = H5_mpio_get_bigio_count(); /* Check if we've visited this span tree before */ if(spans->op_gen != op_gen) { H5S_mpio_mpitype_node_t *type_node; /* Pointer to new node in MPI data type list */ @@ -1185,7 +1163,7 @@ H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down, H5_CHECK_OVERFLOW(nelmts, hsize_t, int) blocklen[outercount] = (int)nelmts; - if(bigio_count < blocklen[outercount]) + if(bigio_count < (hsize_t)blocklen[outercount]) large_block = TRUE; /* at least one block type is large, so set this flag to true */ span = span->next; @@ -1202,8 +1180,8 @@ H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down, MPI_Datatype temp_type = MPI_DATATYPE_NULL; /* create the block type from elmt_type while checking the 32 bit int limit */ - if(blocklen[u] > bigio_count) { - if(H5S__mpio_create_large_type(blocklen[u], 0, *elmt_type, &temp_type) < 0) + if((hsize_t)(blocklen[u]) > bigio_count) { + if(H5_mpio_create_large_type(blocklen[u], 0, *elmt_type, &temp_type) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large element datatype in span_hyper selection") } /* end if */ else @@ -1453,113 +1431,5 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5S_mpio_space_type() */ - -/*------------------------------------------------------------------------- - * Function: H5S__mpio_create_large_type - * - * Purpose: Create a large datatype of size larger than what a 32 bit integer - * can hold. - * - * Return: Non-negative on success, negative on failure. - * - * *new_type the new datatype created - * - * Programmer: Mohamad Chaarawi - * - *------------------------------------------------------------------------- - */ -static herr_t -H5S__mpio_create_large_type(hsize_t num_elements, MPI_Aint stride_bytes, - MPI_Datatype old_type, MPI_Datatype *new_type) -{ - int num_big_types; /* num times the 2G datatype will be repeated */ - int remaining_bytes; /* the number of bytes left that can be held in an int value */ - hsize_t leftover; - int block_len[2]; - int mpi_code; /* MPI return code */ - MPI_Datatype inner_type, outer_type, leftover_type, type[2]; - MPI_Aint disp[2], old_extent; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_STATIC - - /* Calculate how many Big MPI datatypes are needed to represent the buffer */ - num_big_types = (int)(num_elements/bigio_count); - leftover = num_elements - num_big_types * (hsize_t)bigio_count; - H5_CHECKED_ASSIGN(remaining_bytes, int, leftover, hsize_t); - - /* Create a contiguous datatype of size equal to the largest - * number that a 32 bit integer can hold x size of old type. - * If the displacement is 0, then the type is contiguous, otherwise - * use type_hvector to create the type with the displacement provided - */ - if (0 == stride_bytes) { - if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(bigio_count, old_type, &inner_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) - } /* end if */ - else - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector(bigio_count, 1, stride_bytes, old_type, &inner_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) - - /* Create a contiguous datatype of the buffer (minus the remaining < 2GB part) - * If a stride is present, use hvector type - */ - if(0 == stride_bytes) { - if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(num_big_types, inner_type, &outer_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) - } /* end if */ - else - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector(num_big_types, 1, stride_bytes, inner_type, &outer_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) - - MPI_Type_free(&inner_type); - - /* If there is a remaining part create a contiguous/vector datatype and then - * use a struct datatype to encapsulate everything. - */ - if(remaining_bytes) { - if(stride_bytes == 0) { - if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(remaining_bytes, old_type, &leftover_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) - } /* end if */ - else - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector((int)(num_elements - (hsize_t)num_big_types * bigio_count), 1, stride_bytes, old_type, &leftover_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) - - /* As of version 4.0, OpenMPI now turns off MPI-1 API calls by default, - * so we're using the MPI-2 version even though we don't need the lb - * value. - */ - { - MPI_Aint unused_lb_arg; - MPI_Type_get_extent(old_type, &unused_lb_arg, &old_extent); - } - - /* Set up the arguments for MPI_Type_struct constructor */ - type[0] = outer_type; - type[1] = leftover_type; - block_len[0] = 1; - block_len[1] = 1; - disp[0] = 0; - disp[1] = (old_extent + stride_bytes) * num_big_types * (MPI_Aint)bigio_count; - - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(2, block_len, disp, type, new_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) - - MPI_Type_free(&outer_type); - MPI_Type_free(&leftover_type); - } /* end if */ - else - /* There are no remaining bytes so just set the new type to - * the outer type created */ - *new_type = outer_type; - - if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5S__mpio_create_large_type() */ - #endif /* H5_HAVE_PARALLEL */ diff --git a/src/H5Sprivate.h b/src/H5Sprivate.h index 0a9d2e7..3d68de0 100644 --- a/src/H5Sprivate.h +++ b/src/H5Sprivate.h @@ -307,7 +307,6 @@ H5_DLL herr_t H5S_select_iter_release(H5S_sel_iter_t *sel_iter); H5_DLL herr_t H5S_sel_iter_close(H5S_sel_iter_t *sel_iter); #ifdef H5_HAVE_PARALLEL -H5_DLL hsize_t H5S_mpio_set_bigio_count(hsize_t new_count); H5_DLL herr_t H5S_mpio_space_type(const H5S_t *space, size_t elmt_size, /* out: */ MPI_Datatype *new_type, int *count, diff --git a/src/H5mpi.c b/src/H5mpi.c index d48790b..f01e16a 100644 --- a/src/H5mpi.c +++ b/src/H5mpi.c @@ -22,6 +22,64 @@ #ifdef H5_HAVE_PARALLEL + +/****************/ +/* Local Macros */ +/****************/ +#define TWO_GIG_LIMIT (1 << 31) +#ifndef H5_MAX_MPI_COUNT +#define H5_MAX_MPI_COUNT (1 << 30) +#endif + +/*******************/ +/* Local Variables */ +/*******************/ +static hsize_t bigio_count = H5_MAX_MPI_COUNT; + + +/*------------------------------------------------------------------------- + * Function: H5_mpio_set_bigio_count + * + * Purpose: Allow us to programatically change the switch point + * when we utilize derived datatypes. This is of + * particular interest for allowing nightly testing + * + * Return: The current/previous value of bigio_count. + * + * Programmer: Richard Warren, March 10, 2017 + * + *------------------------------------------------------------------------- + */ +hsize_t +H5_mpio_set_bigio_count(hsize_t new_count) +{ + hsize_t orig_count = bigio_count; + + if((new_count > 0) && (new_count < (hsize_t)TWO_GIG_LIMIT)) { + bigio_count = new_count; + } + return orig_count; +} /* end H5_mpio_set_bigio_count() */ + + +/*------------------------------------------------------------------------- + * Function: H5_mpio_get_bigio_count + * + * Purpose: Allow other HDF5 library functions to access + * the current value for bigio_count. + * + * Return: The current/previous value of bigio_count. + * + * Programmer: Richard Warren, October 7, 2019 + * + *------------------------------------------------------------------------- + */ +hsize_t +H5_mpio_get_bigio_count() +{ + return bigio_count; +} + /*------------------------------------------------------------------------- * Function: H5_mpi_comm_dup @@ -392,5 +450,114 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5_mpi_info_cmp() */ + +/*------------------------------------------------------------------------- + * Function: H5_mpio_create_large_type + * + * Purpose: Create a large datatype of size larger than what a 32 bit integer + * can hold. + * + * Return: Non-negative on success, negative on failure. + * + * *new_type the new datatype created + * + * Programmer: Mohamad Chaarawi + * + *------------------------------------------------------------------------- + */ +herr_t +H5_mpio_create_large_type(hsize_t num_elements, MPI_Aint stride_bytes, + MPI_Datatype old_type, MPI_Datatype *new_type) +{ + int num_big_types; /* num times the 2G datatype will be repeated */ + int remaining_bytes; /* the number of bytes left that can be held in an int value */ + hsize_t leftover; + int block_len[2]; + int mpi_code; /* MPI return code */ + MPI_Datatype inner_type, outer_type, leftover_type, type[2]; + MPI_Aint disp[2], old_extent; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + + /* Calculate how many Big MPI datatypes are needed to represent the buffer */ + num_big_types = (int)(num_elements/bigio_count); + leftover = num_elements - num_big_types * (hsize_t)bigio_count; + H5_CHECKED_ASSIGN(remaining_bytes, int, leftover, hsize_t); + + /* Create a contiguous datatype of size equal to the largest + * number that a 32 bit integer can hold x size of old type. + * If the displacement is 0, then the type is contiguous, otherwise + * use type_hvector to create the type with the displacement provided + */ + if (0 == stride_bytes) { + if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(bigio_count, old_type, &inner_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } /* end if */ + else + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector(bigio_count, 1, stride_bytes, old_type, &inner_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) + + /* Create a contiguous datatype of the buffer (minus the remaining < 2GB part) + * If a stride is present, use hvector type + */ + if(0 == stride_bytes) { + if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(num_big_types, inner_type, &outer_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } /* end if */ + else + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector(num_big_types, 1, stride_bytes, inner_type, &outer_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) + + MPI_Type_free(&inner_type); + + /* If there is a remaining part create a contiguous/vector datatype and then + * use a struct datatype to encapsulate everything. + */ + if(remaining_bytes) { + if(stride_bytes == 0) { + if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(remaining_bytes, old_type, &leftover_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } /* end if */ + else + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector((int)(num_elements - (hsize_t)num_big_types * bigio_count), 1, stride_bytes, old_type, &leftover_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) + + /* As of version 4.0, OpenMPI now turns off MPI-1 API calls by default, + * so we're using the MPI-2 version even though we don't need the lb + * value. + */ + { + MPI_Aint unused_lb_arg; + MPI_Type_get_extent(old_type, &unused_lb_arg, &old_extent); + } + + /* Set up the arguments for MPI_Type_struct constructor */ + type[0] = outer_type; + type[1] = leftover_type; + block_len[0] = 1; + block_len[1] = 1; + disp[0] = 0; + disp[1] = (old_extent + stride_bytes) * num_big_types * (MPI_Aint)bigio_count; + + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(2, block_len, disp, type, new_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) + + MPI_Type_free(&outer_type); + MPI_Type_free(&leftover_type); + } /* end if */ + else + /* There are no remaining bytes so just set the new type to + * the outer type created */ + *new_type = outer_type; + + if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5_mpio_create_large_type() */ + + #endif /* H5_HAVE_PARALLEL */ diff --git a/testpar/t_bigio.c b/testpar/t_bigio.c index 9ca077c..fe96c83 100644 --- a/testpar/t_bigio.c +++ b/testpar/t_bigio.c @@ -4,7 +4,8 @@ #include "H5Dprivate.h" /* For Chunk tests */ /* FILENAME and filenames must have the same number of names */ -const char *FILENAME[2]={ "bigio_test.h5", +const char *FILENAME[3]={ "bigio_test.h5", + "single_rank_independent_io.h5", NULL }; @@ -29,7 +30,8 @@ const char *FILENAME[2]={ "bigio_test.h5", #define DATASET5 "DSET5" #define DXFER_COLLECTIVE_IO 0x1 /* Collective IO*/ #define DXFER_INDEPENDENT_IO 0x2 /* Independent IO collectively */ -#define DXFER_BIGCOUNT 536870916 +#define DXFER_BIGCOUNT (1 < 29) +#define LARGE_DIM 1610612736 #define HYPER 1 #define POINT 2 @@ -40,7 +42,7 @@ typedef hsize_t B_DATATYPE; int facc_type = FACC_MPIO; /*Test file access type */ int dxfer_coll_type = DXFER_COLLECTIVE_IO; -size_t bigcount = DXFER_BIGCOUNT; +size_t bigcount = (size_t)DXFER_BIGCOUNT; int nerrors = 0; int mpi_size, mpi_rank; @@ -51,6 +53,8 @@ static void coll_chunktest(const char* filename, int chunk_factor, int select_fa int api_option, int file_selection, int mem_selection, int mode); hid_t create_faccess_plist(MPI_Comm comm, MPI_Info info, int l_facc_type); +hsize_t H5_mpio_set_bigio_count(hsize_t new_count); + /* * Setup the coordinates for point selection. */ @@ -478,22 +482,19 @@ static void dataset_big_write(void) { - hid_t xfer_plist; /* Dataset transfer properties list */ - hid_t sid; /* Dataspace ID */ - hid_t file_dataspace; /* File dataspace ID */ - hid_t mem_dataspace; /* memory dataspace ID */ + hid_t xfer_plist; /* Dataset transfer properties list */ + hid_t sid; /* Dataspace ID */ + hid_t file_dataspace; /* File dataspace ID */ + hid_t mem_dataspace; /* memory dataspace ID */ hid_t dataset; - hid_t datatype; /* Datatype ID */ - hsize_t dims[RANK]; /* dataset dim sizes */ - hsize_t start[RANK]; /* for hyperslab setting */ - hsize_t count[RANK], stride[RANK]; /* for hyperslab setting */ - hsize_t block[RANK]; /* for hyperslab setting */ + hsize_t dims[RANK]; /* dataset dim sizes */ + hsize_t start[RANK]; /* for hyperslab setting */ + hsize_t count[RANK],stride[RANK]; /* for hyperslab setting */ + hsize_t block[RANK]; /* for hyperslab setting */ hsize_t *coords = NULL; - int i; - herr_t ret; /* Generic return value */ - hid_t fid; /* HDF5 file ID */ - hid_t acc_tpl; /* File access templates */ - hsize_t h; + herr_t ret; /* Generic return value */ + hid_t fid; /* HDF5 file ID */ + hid_t acc_tpl; /* File access templates */ size_t num_points; B_DATATYPE * wdata; @@ -806,8 +807,6 @@ dataset_big_read(void) hsize_t start[RANK]; /* for hyperslab setting */ hsize_t count[RANK], stride[RANK]; /* for hyperslab setting */ hsize_t block[RANK]; /* for hyperslab setting */ - int i,j,k; - hsize_t h; size_t num_points; hsize_t *coords = NULL; herr_t ret; /* Generic return value */ @@ -1120,6 +1119,63 @@ dataset_big_read(void) } /* dataset_large_readAll */ +static void +single_rank_independent_io(void) +{ + if (mpi_rank == 0) + HDprintf("single_rank_independent_io\n"); + + if (MAINPROCESS) { + hsize_t dims[] = { LARGE_DIM }; + hid_t file_id = -1; + hid_t fapl_id = -1; + hid_t dset_id = -1; + hid_t fspace_id = -1; + hid_t mspace_id = -1; + void *data = NULL; + + fapl_id = H5Pcreate(H5P_FILE_ACCESS); + VRFY((fapl_id >= 0), "H5P_FILE_ACCESS"); + + H5Pset_fapl_mpio(fapl_id, MPI_COMM_SELF, MPI_INFO_NULL); + file_id = H5Fcreate(FILENAME[1], H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id); + VRFY((file_id >= 0), "H5Dcreate2 succeeded"); + + fspace_id = H5Screate_simple(1, dims, NULL); + VRFY((fspace_id >= 0), "H5Screate_simple fspace_id succeeded"); + + /* + * Create and write to a >2GB dataset from a single rank. + */ + dset_id = H5Dcreate2(file_id, "test_dset", H5T_NATIVE_INT, fspace_id, + H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + + VRFY((dset_id >= 0), "H5Dcreate2 succeeded"); + + data = malloc(LARGE_DIM * sizeof(int)); + + if (mpi_rank == 0) + H5Sselect_all(fspace_id); + else + H5Sselect_none(fspace_id); + + dims[0] = LARGE_DIM; + mspace_id = H5Screate_simple(1, dims, NULL); + VRFY((mspace_id >= 0), "H5Screate_simple mspace_id succeeded"); + H5Dwrite(dset_id, H5T_NATIVE_INT, mspace_id, fspace_id, H5P_DEFAULT, data); + + free(data); + H5Sclose(mspace_id); + H5Sclose(fspace_id); + H5Pclose(fapl_id); + H5Dclose(dset_id); + H5Fclose(file_id); + + HDremove(FILENAME[1]); + + } + MPI_Barrier(MPI_COMM_WORLD); +} /* * Create the appropriate File access property list @@ -1395,7 +1451,6 @@ coll_chunktest(const char* filename, size_t num_points; /* for point selection */ hsize_t *coords = NULL; /* for point selection */ - int i; /* Create the data space */ @@ -1873,7 +1928,7 @@ int main(int argc, char **argv) int ExpressMode = 0; hsize_t newsize = 1048576; /* Set the bigio processing limit to be 'newsize' bytes */ - hsize_t oldsize = H5S_mpio_set_bigio_count(newsize); + hsize_t oldsize = H5_mpio_set_bigio_count(newsize); /* Having set the bigio handling to a size that is managable, * we'll set our 'bigcount' variable to be 2X that limit so @@ -1918,6 +1973,8 @@ int main(int argc, char **argv) coll_chunk2(); MPI_Barrier(MPI_COMM_WORLD); coll_chunk3(); + MPI_Barrier(MPI_COMM_WORLD); + single_rank_independent_io(); } /* turn off alarm */ -- cgit v0.12