diff options
author | Quincey Koziol <koziol@koziol.gov> | 2020-06-26 23:57:38 (GMT) |
---|---|---|
committer | Quincey Koziol <koziol@koziol.gov> | 2020-06-26 23:57:38 (GMT) |
commit | e767f44e953047297fece364218147c908e8b585 (patch) | |
tree | d4b6909f14b78bb732b6947adc46ac3cd3ca54f6 /src/H5Smpio.c | |
parent | 949649a2b6e00297cbdc49437e70a27e455e92d2 (diff) | |
parent | a08ab621febde7b09e4d86eab80cb029c123e9f6 (diff) | |
download | hdf5-e767f44e953047297fece364218147c908e8b585.zip hdf5-e767f44e953047297fece364218147c908e8b585.tar.gz hdf5-e767f44e953047297fece364218147c908e8b585.tar.bz2 |
Merge remote-tracking branch 'origin/develop' into monotonic_timer
Diffstat (limited to 'src/H5Smpio.c')
-rw-r--r-- | src/H5Smpio.c | 1371 |
1 files changed, 656 insertions, 715 deletions
diff --git a/src/H5Smpio.c b/src/H5Smpio.c index 46f7a59..33dd492 100644 --- a/src/H5Smpio.c +++ b/src/H5Smpio.c @@ -14,90 +14,101 @@ /* * Programmer: rky 980813 * - * Purpose: Functions to read/write directly between app buffer and file. + * Purpose: Create MPI data types for HDF5 selections. * - * Beware of the ifdef'ed print statements. - * I didn't make them portable. */ +/****************/ +/* Module Setup */ +/****************/ + #include "H5Smodule.h" /* This source code file is part of the H5S module */ +/***********/ +/* Headers */ +/***********/ #include "H5private.h" /* Generic Functions */ #include "H5Dprivate.h" /* Datasets */ #include "H5Eprivate.h" /* Error handling */ -#include "H5Fprivate.h" /* File access */ -#include "H5FDprivate.h" /* File drivers */ -#include "H5Iprivate.h" /* IDs */ +#include "H5FLprivate.h" /* Free Lists */ #include "H5MMprivate.h" /* Memory management */ -#include "H5Oprivate.h" /* Object headers */ -#include "H5Pprivate.h" /* Property lists */ #include "H5Spkg.h" /* Dataspaces */ #include "H5VMprivate.h" /* Vector and array functions */ #ifdef H5_HAVE_PARALLEL -static herr_t H5S_mpio_all_type(const H5S_t *space, size_t elmt_size, +/****************/ +/* Local Macros */ +/****************/ +#define H5S_MPIO_INITIAL_ALLOC_COUNT 256 + +/*******************/ +/* Local Variables */ +/*******************/ + +/******************/ +/* Local Typedefs */ +/******************/ + +/* Node in linked list of MPI data types created during traversal of irregular hyperslab selection */ +typedef struct H5S_mpio_mpitype_node_t { + MPI_Datatype type; /* MPI Datatype */ + struct H5S_mpio_mpitype_node_t *next; /* Pointer to next node in list */ +} H5S_mpio_mpitype_node_t; + +/* List to track MPI data types generated during traversal of irregular hyperslab selection */ +typedef struct H5S_mpio_mpitype_list_t { + H5S_mpio_mpitype_node_t *head; /* Pointer to head of list */ + H5S_mpio_mpitype_node_t *tail; /* Pointer to tail of list */ +} H5S_mpio_mpitype_list_t; + + +/********************/ +/* Local Prototypes */ +/********************/ +static herr_t H5S__mpio_all_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count, hbool_t *is_derived_type); -static herr_t H5S_mpio_none_type(MPI_Datatype *new_type, int *count, +static herr_t H5S__mpio_none_type(MPI_Datatype *new_type, int *count, hbool_t *is_derived_type); -static herr_t H5S_mpio_create_point_datatype(size_t elmt_size, hsize_t num_points, +static herr_t H5S__mpio_create_point_datatype(size_t elmt_size, hsize_t num_points, MPI_Aint *disp, MPI_Datatype *new_type); -static herr_t H5S_mpio_point_type(const H5S_t *space, size_t elmt_size, +static herr_t H5S__mpio_point_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count, hbool_t *is_derived_type, hbool_t do_permute, hsize_t **permute_map, hbool_t *is_permuted); -static herr_t H5S_mpio_permute_type(const H5S_t *space, size_t elmt_size, +static herr_t H5S__mpio_permute_type(const H5S_t *space, size_t elmt_size, hsize_t **permute_map, MPI_Datatype *new_type, int *count, hbool_t *is_derived_type); -static herr_t H5S_mpio_hyper_type(const H5S_t *space, size_t elmt_size, +static herr_t H5S__mpio_reg_hyper_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count, hbool_t *is_derived_type); -static herr_t H5S_mpio_span_hyper_type(const H5S_t *space, size_t elmt_size, +static herr_t H5S__mpio_span_hyper_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count, hbool_t *is_derived_type); -static herr_t H5S_obtain_datatype(const hsize_t down[], H5S_hyper_span_t* span, - const MPI_Datatype *elmt_type, MPI_Datatype *span_type, size_t elmt_size); -static herr_t H5S_mpio_create_large_type (hsize_t, MPI_Aint, MPI_Datatype , MPI_Datatype *); +static herr_t H5S__release_datatype(H5S_mpio_mpitype_list_t *type_list); +static herr_t H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down, + size_t elmt_size, const MPI_Datatype *elmt_type, MPI_Datatype *span_type, + H5S_mpio_mpitype_list_t *type_list, unsigned op_info_i, uint64_t op_gen); -#define H5S_MPIO_INITIAL_ALLOC_COUNT 256 +/*****************************/ +/* Library Private Variables */ +/*****************************/ -#define TWO_GIG_LIMIT 2147483648 -#ifndef H5S_MAX_MPI_COUNT -#define H5S_MAX_MPI_COUNT 536870911 /* (2^29)-1 */ -#endif +/*********************/ +/* Package Variables */ +/*********************/ -static hsize_t bigio_count = H5S_MAX_MPI_COUNT; -/*------------------------------------------------------------------------- - * Function: H5S_mpio_set_bigio_count - * - * Purpose: Allow us to programatically change the switch point - * when we utilize derived datatypes. This is of - * particular interest for allowing nightly testing - * - * Return: the current/previous value of bigio_count. - * - * Programmer: Richard Warren, March 10, 2017 - * - *------------------------------------------------------------------------- - */ -hsize_t -H5S_mpio_set_bigio_count(hsize_t new_count) -{ - hsize_t orig_count = bigio_count; - if ((new_count > 0) && (new_count < TWO_GIG_LIMIT)) { - bigio_count = new_count; - } - return orig_count; -} +/* Declare a free list to manage the H5S_mpio_mpitype_node_t struct */ +H5FL_DEFINE_STATIC(H5S_mpio_mpitype_node_t); /*------------------------------------------------------------------------- - * Function: H5S_mpio_all_type + * Function: H5S__mpio_all_type * * Purpose: Translate an HDF5 "all" selection into an MPI type. * - * Return: non-negative on success, negative on failure. + * Return: Non-negative on success, negative on failure. * * Outputs: *new_type the MPI type corresponding to the selection * *count how many objects of the new_type in selection @@ -105,24 +116,20 @@ H5S_mpio_set_bigio_count(hsize_t new_count) * *is_derived_type 0 if MPI primitive type, 1 if derived * * Programmer: rky 980813 - * Modifications: - * Mohamad Chaarawi - * Adding support for large datatypes (beyond the limit of a - * 32 bit integer. - * * *------------------------------------------------------------------------- */ static herr_t -H5S_mpio_all_type(const H5S_t *space, size_t elmt_size, +H5S__mpio_all_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count, hbool_t *is_derived_type) { hsize_t total_bytes; hssize_t snelmts; /* Total number of elmts (signed) */ hsize_t nelmts; /* Total number of elmts */ + hsize_t bigio_count; /* Transition point to create derived type */ herr_t ret_value = SUCCEED; /* Return value */ - FUNC_ENTER_NOAPI_NOINIT + FUNC_ENTER_STATIC /* Check args */ HDassert(space); @@ -133,34 +140,34 @@ H5S_mpio_all_type(const H5S_t *space, size_t elmt_size, H5_CHECKED_ASSIGN(nelmts, hsize_t, snelmts, hssize_t); total_bytes = (hsize_t)elmt_size * nelmts; + bigio_count = H5_mpi_get_bigio_count(); + /* Verify that the size can be expressed as a 32 bit integer */ if(bigio_count >= total_bytes) { - /* fill in the return values */ - *new_type = MPI_BYTE; - H5_CHECKED_ASSIGN(*count, int, total_bytes, hsize_t); - *is_derived_type = FALSE; - } + /* fill in the return values */ + *new_type = MPI_BYTE; + H5_CHECKED_ASSIGN(*count, int, total_bytes, hsize_t); + *is_derived_type = FALSE; + } /* end if */ else { - /* Create a LARGE derived datatype for this transfer */ - if (H5S_mpio_create_large_type (total_bytes, 0, MPI_BYTE, new_type) < 0) { - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, - "couldn't create a large datatype from the all selection") - } - *count = 1; - *is_derived_type = TRUE; - } + /* Create a LARGE derived datatype for this transfer */ + if(H5_mpio_create_large_type(total_bytes, 0, MPI_BYTE, new_type) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large datatype from the all selection") + *count = 1; + *is_derived_type = TRUE; + } /* end else */ done: FUNC_LEAVE_NOAPI(ret_value) -} /* H5S_mpio_all_type() */ +} /* H5S__mpio_all_type() */ /*------------------------------------------------------------------------- - * Function: H5S_mpio_none_type + * Function: H5S__mpio_none_type * * Purpose: Translate an HDF5 "none" selection into an MPI type. * - * Return: non-negative on success, negative on failure. + * Return: Non-negative on success, negative on failure. * * Outputs: *new_type the MPI type corresponding to the selection * *count how many objects of the new_type in selection @@ -172,9 +179,9 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5S_mpio_none_type(MPI_Datatype *new_type, int *count, hbool_t *is_derived_type) +H5S__mpio_none_type(MPI_Datatype *new_type, int *count, hbool_t *is_derived_type) { - FUNC_ENTER_NOAPI_NOINIT_NOERR + FUNC_ENTER_STATIC_NOERR /* fill in the return values */ *new_type = MPI_BYTE; @@ -182,15 +189,15 @@ H5S_mpio_none_type(MPI_Datatype *new_type, int *count, hbool_t *is_derived_type) *is_derived_type = FALSE; FUNC_LEAVE_NOAPI(SUCCEED) -} /* H5S_mpio_none_type() */ +} /* H5S__mpio_none_type() */ /*------------------------------------------------------------------------- - * Function: H5S_mpio_create_point_datatype + * Function: H5S__mpio_create_point_datatype * * Purpose: Create a derived datatype for point selections. * - * Return: non-negative on success, negative on failure. + * Return: Non-negative on success, negative on failure. * * Outputs: *new_type the MPI type corresponding to the selection * @@ -198,138 +205,156 @@ H5S_mpio_none_type(MPI_Datatype *new_type, int *count, hbool_t *is_derived_type) * *------------------------------------------------------------------------- */ -static herr_t -H5S_mpio_create_point_datatype (size_t elmt_size, hsize_t num_points, - MPI_Aint *disp, MPI_Datatype *new_type) +static herr_t +H5S__mpio_create_point_datatype(size_t elmt_size, hsize_t num_points, + MPI_Aint *disp, MPI_Datatype *new_type) { MPI_Datatype elmt_type; /* MPI datatype for individual element */ hbool_t elmt_type_created = FALSE; /* Whether the element MPI datatype was created */ - int mpi_code; /* MPI error code */ + int *inner_blocks = NULL; /* Arrays for MPI datatypes when "large" datatype needed */ + MPI_Aint *inner_disps = NULL; + MPI_Datatype *inner_types = NULL; +#if MPI_VERSION < 3 int *blocks = NULL; /* Array of block sizes for MPI hindexed create call */ hsize_t u; /* Local index variable */ +#endif + hsize_t bigio_count; /* Transition point to create derived type */ + int mpi_code; /* MPI error code */ herr_t ret_value = SUCCEED; /* Return value */ - FUNC_ENTER_NOAPI_NOINIT + FUNC_ENTER_STATIC /* Create an MPI datatype for an element */ if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &elmt_type))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) elmt_type_created = TRUE; + bigio_count = H5_mpi_get_bigio_count(); + /* Check whether standard or BIGIO processing will be employeed */ if(bigio_count >= num_points) { #if MPI_VERSION >= 3 - /* Create an MPI datatype for the whole point selection */ - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block((int)num_points, 1, disp, elmt_type, new_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_indexed_block failed", mpi_code) + /* Create an MPI datatype for the whole point selection */ + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block((int)num_points, 1, disp, elmt_type, new_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_indexed_block failed", mpi_code) #else - /* Allocate block sizes for MPI datatype call */ - if(NULL == (blocks = (int *)H5MM_malloc(sizeof(int) * num_points))) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") + /* Allocate block sizes for MPI datatype call */ + if(NULL == (blocks = (int *)H5MM_malloc(sizeof(int) * num_points))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") + + for(u = 0; u < num_points; u++) + blocks[u] = 1; + + /* Create an MPI datatype for the whole point selection */ + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)num_points, blocks, disp, elmt_type, new_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) +#endif + + /* Commit MPI datatype for later use */ + if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + } /* end if */ + else { + /* use LARGE_DATATYPE:: + * We'll create an hindexed_block type for every 2G point count and then combine + * those and any remaining points into a single large datatype. + */ + int total_types, i; + int remaining_points; + int num_big_types; + hsize_t leftover; + + /* Calculate how many Big MPI datatypes are needed to represent the buffer */ + num_big_types = (int)(num_points / bigio_count); + + leftover = (hsize_t)num_points - (hsize_t)num_big_types * (hsize_t)bigio_count; + H5_CHECKED_ASSIGN(remaining_points, int, leftover, hsize_t); + + total_types = (int)(remaining_points) ? (num_big_types + 1) : num_big_types; - for(u = 0; u < num_points; u++) - blocks[u] = 1; + /* Allocate array if MPI derived types needed */ + if(NULL == (inner_types = (MPI_Datatype *)H5MM_malloc((sizeof(MPI_Datatype) * (size_t)total_types)))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") - /* Create an MPI datatype for the whole point selection */ - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)num_points, blocks, disp, elmt_type, new_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) + if(NULL == (inner_blocks = (int *)H5MM_malloc(sizeof(int) * (size_t)total_types))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") + + if(NULL == (inner_disps = (MPI_Aint *)H5MM_malloc(sizeof(MPI_Aint) * (size_t)total_types))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") + +#if MPI_VERSION < 3 + /* Allocate block sizes for MPI datatype call */ + if(NULL == (blocks = (int *)H5MM_malloc(sizeof(int) * bigio_count))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") + + for(u = 0; u < bigio_count; u++) + blocks[u] = 1; +#endif + + for(i = 0; i < num_big_types; i++) { +#if MPI_VERSION >= 3 + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block((int)bigio_count, + 1, &disp[(hsize_t)i*bigio_count], elmt_type, &inner_types[i]))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code) +#else + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)bigio_count, + blocks, &disp[i*bigio_count], elmt_type, &inner_types[i]))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) +#endif + inner_blocks[i] = 1; + inner_disps[i] = 0; + } /* end for*/ + + if(remaining_points) { +#if MPI_VERSION >= 3 + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block(remaining_points, + 1, &disp[(hsize_t)num_big_types*bigio_count], elmt_type, &inner_types[num_big_types]))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code) +#else + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)remaining_points, + blocks, &disp[num_big_types*bigio_count], elmt_type, &inner_types[num_big_types]))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) #endif + inner_blocks[num_big_types] = 1; + inner_disps[num_big_types] = 0; + } /* end if */ + + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(total_types, + inner_blocks, inner_disps, inner_types, new_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct", mpi_code) + for(i = 0; i < total_types; i++) + MPI_Type_free(&inner_types[i]); + + /* Commit MPI datatype for later use */ + if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + } /* end else */ - /* Commit MPI datatype for later use */ - if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) - } - else { - /* use LARGE_DATATYPE:: - * We'll create an hindexed_block type for every 2G point count and then combine - * those and any remaining points into a single large datatype. - */ - int total_types, i; - int remaining_points; - int num_big_types; - hsize_t leftover; - - int *inner_blocks; - MPI_Aint *inner_disps; - MPI_Datatype *inner_types = NULL; - - /* Calculate how many Big MPI datatypes are needed to represent the buffer */ - num_big_types = (int)(num_points/bigio_count); - - leftover = (hsize_t)num_points - (hsize_t)num_big_types * (hsize_t)bigio_count; - H5_CHECKED_ASSIGN(remaining_points, int, leftover, hsize_t); - - total_types = (int)(remaining_points) ? (num_big_types + 1) : num_big_types; - - /* Allocate array if MPI derived types needed */ - if(NULL == (inner_types = (MPI_Datatype *)H5MM_malloc((sizeof(MPI_Datatype) * (size_t)total_types)))) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") - - if(NULL == (inner_blocks = (int *)H5MM_malloc(sizeof(int) * (size_t)total_types))) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") - - if(NULL == (inner_disps = (MPI_Aint *)H5MM_malloc(sizeof(MPI_Aint) * (size_t)total_types))) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") - - for(i=0 ; i<num_big_types ; i++) { - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block(bigio_count, - 1, - &disp[i*bigio_count], - elmt_type, - &inner_types[i]))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code); - } - inner_blocks[i] = 1; - inner_disps[i] = 0; - } - - if(remaining_points) { - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block(remaining_points, - 1, - &disp[num_big_types*bigio_count], - elmt_type, - &inner_types[num_big_types]))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code); - } - inner_blocks[num_big_types] = 1; - inner_disps[num_big_types] = 0; - } - - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(total_types, - inner_blocks, - inner_disps, - inner_types, - new_type))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct", mpi_code); - } - for(i=0 ; i<total_types ; i++) - MPI_Type_free(&inner_types[i]); - - H5MM_free(inner_types); - H5MM_free(inner_blocks); - H5MM_free(inner_disps); - - /* Commit MPI datatype for later use */ - if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) - } done: if(elmt_type_created) MPI_Type_free(&elmt_type); +#if MPI_VERSION < 3 if(blocks) H5MM_free(blocks); +#endif + if(inner_types) + H5MM_free(inner_types); + if(inner_blocks) + H5MM_free(inner_blocks); + if(inner_disps) + H5MM_free(inner_disps); FUNC_LEAVE_NOAPI(ret_value) -} /* H5S_mpio_create_point_datatype() */ +} /* H5S__mpio_create_point_datatype() */ /*------------------------------------------------------------------------- - * Function: H5S_mpio_point_type + * Function: H5S__mpio_point_type * * Purpose: Translate an HDF5 "point" selection into an MPI type. * Create a permutation array to handle out-of-order point selections. * - * Return: non-negative on success, negative on failure. + * Return: Non-negative on success, negative on failure. * * Outputs: *new_type the MPI type corresponding to the selection * *count how many objects of the new_type in selection @@ -344,7 +369,7 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5S_mpio_point_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, +H5S__mpio_point_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count, hbool_t *is_derived_type, hbool_t do_permute, hsize_t **permute, hbool_t *is_permuted) { @@ -355,7 +380,7 @@ H5S_mpio_point_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type hsize_t u; /* Local index variable */ herr_t ret_value = SUCCEED; /* Return value */ - FUNC_ENTER_NOAPI_NOINIT + FUNC_ENTER_STATIC /* Check args */ HDassert(space); @@ -377,28 +402,31 @@ H5S_mpio_point_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type /* Iterate through list of elements */ curr = space->select.sel_info.pnt_lst->head; for(u = 0 ; u < num_points ; u++) { - /* calculate the displacement of the current point */ - disp[u] = H5VM_array_offset(space->extent.rank, space->extent.size, curr->pnt); - disp[u] *= elmt_size; - - /* This is a File Space used to set the file view, so adjust the displacements + /* Calculate the displacement of the current point */ + hsize_t disp_tmp = H5VM_array_offset(space->extent.rank, space->extent.size, curr->pnt); + if(disp_tmp > LONG_MAX) /* Maximum value of type long */ + HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "disp overflow") + disp[u] = (MPI_Aint)disp_tmp; + disp[u] *= (MPI_Aint)elmt_size; + + /* This is a File Space used to set the file view, so adjust the displacements * to have them monotonically non-decreasing. - * Generate the permutation array by indicating at each point being selected, - * the position it will shifted in the new displacement. Example: - * Suppose 4 points with corresponding are selected - * Pt 1: disp=6 ; Pt 2: disp=3 ; Pt 3: disp=0 ; Pt 4: disp=4 + * Generate the permutation array by indicating at each point being selected, + * the position it will shifted in the new displacement. Example: + * Suppose 4 points with corresponding are selected + * Pt 1: disp=6 ; Pt 2: disp=3 ; Pt 3: disp=0 ; Pt 4: disp=4 * The permute map to sort the displacements in order will be: * point 1: map[0] = L, indicating that this point is not moved (1st point selected) - * point 2: map[1] = 0, indicating that this point is moved to the first position, + * point 2: map[1] = 0, indicating that this point is moved to the first position, * since disp_pt1(6) > disp_pt2(3) - * point 3: map[2] = 0, move to position 0, bec it has the lowest disp between + * point 3: map[2] = 0, move to position 0, bec it has the lowest disp between * the points selected so far. - * point 4: map[3] = 2, move the 2nd position since point 1 has a higher disp, + * point 4: map[3] = 2, move the 2nd position since point 1 has a higher disp, * but points 2 and 3 have lower displacements. */ if(do_permute) { if(u > 0 && disp[u] < disp[u - 1]) { - unsigned s = 0, l = u, m = u / 2; + hsize_t s = 0, l = u, m = u / 2; *is_permuted = TRUE; do { @@ -418,7 +446,7 @@ H5S_mpio_point_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type HDmemmove(disp + m + 1, disp + m, (u - m) * sizeof(MPI_Aint)); disp[m] = temp; } /* end if */ - (*permute)[u] = m; + (*permute)[u] = m; } /* end if */ else (*permute)[u] = num_points; @@ -426,7 +454,7 @@ H5S_mpio_point_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type /* this is a memory space, and no permutation is necessary to create the derived datatype */ else { - ;/* do nothing */ + ; /* do nothing */ } /* end else */ /* get the next point */ @@ -434,7 +462,7 @@ H5S_mpio_point_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type } /* end for */ /* Create the MPI datatype for the set of element displacements */ - if(H5S_mpio_create_point_datatype(elmt_size, num_points, disp, new_type) < 0) + if(H5S__mpio_create_point_datatype(elmt_size, num_points, disp, new_type) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create an MPI Datatype from point selection") /* Set values about MPI datatype created */ @@ -452,11 +480,11 @@ done: } /* end if */ FUNC_LEAVE_NOAPI(ret_value) -} /* H5S_mpio_point_type() */ +} /* H5S__mpio_point_type() */ /*------------------------------------------------------------------------- - * Function: H5S_mpio_permute_type + * Function: H5S__mpio_permute_type * * Purpose: Translate an HDF5 "all/hyper/point" selection into an MPI type, * while applying the permutation map. This function is called if @@ -464,10 +492,10 @@ done: * selection and so the memory datatype has to be permuted using the * permutation map created by the file selection. * - * Note: This routine is called from H5S_mpio_space_type(), which is + * Note: This routine is called from H5_mpio_space_type(), which is * called first for the file dataspace and creates * - * Return: non-negative on success, negative on failure. + * Return: Non-negative on success, negative on failure. * * Outputs: *new_type the MPI type corresponding to the selection * *count how many objects of the new_type in selection @@ -479,7 +507,7 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5S_mpio_permute_type(const H5S_t *space, size_t elmt_size, hsize_t **permute, +H5S__mpio_permute_type(const H5S_t *space, size_t elmt_size, hsize_t **permute, MPI_Datatype *new_type, int *count, hbool_t *is_derived_type) { MPI_Aint *disp = NULL; /* Datatype displacement for each point*/ @@ -491,7 +519,7 @@ H5S_mpio_permute_type(const H5S_t *space, size_t elmt_size, hsize_t **permute, hsize_t u; /* Local index variable */ herr_t ret_value = SUCCEED; /* Return value */ - FUNC_ENTER_NOAPI_NOINIT + FUNC_ENTER_STATIC /* Check args */ HDassert(space); @@ -506,7 +534,7 @@ H5S_mpio_permute_type(const H5S_t *space, size_t elmt_size, hsize_t **permute, HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of displacements") /* Initialize selection iterator */ - if(H5S_select_iter_init(&sel_iter, space, elmt_size) < 0) + if(H5S_select_iter_init(&sel_iter, space, elmt_size, 0) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator") sel_iter_init = TRUE; /* Selection iteration info has been initialized */ @@ -523,7 +551,7 @@ H5S_mpio_permute_type(const H5S_t *space, size_t elmt_size, hsize_t **permute, size_t curr_seq; /* Current sequence being worked on */ /* Get the sequences of bytes */ - if(H5S_SELECT_GET_SEQ_LIST(space, 0, &sel_iter, (size_t)H5D_IO_VECTOR_SIZE, max_elem, &nseq, &nelem, off, len) < 0) + if(H5S_SELECT_ITER_GET_SEQ_LIST(&sel_iter, (size_t)H5D_IO_VECTOR_SIZE, max_elem, &nseq, &nelem, off, len) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_UNSUPPORTED, FAIL, "sequence length generation failed") /* Loop, while sequences left to process */ @@ -540,14 +568,16 @@ H5S_mpio_permute_type(const H5S_t *space, size_t elmt_size, hsize_t **permute, /* Loop, while bytes left in sequence */ while(curr_len > 0) { /* Set the displacement of the current point */ - disp[u] = curr_off; + if(curr_off > LONG_MAX) + HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "curr_off overflow") + disp[u] = (MPI_Aint)curr_off; - /* This is a memory displacement, so for each point selected, + /* This is a memory displacement, so for each point selected, * apply the map that was generated by the file selection */ if((*permute)[u] != num_points) { MPI_Aint temp = disp[u]; - HDmemmove(disp + (*permute)[u] + 1, disp + (*permute)[u], + HDmemmove(disp + (*permute)[u] + 1, disp + (*permute)[u], (u - (*permute)[u]) * sizeof(MPI_Aint)); disp[(*permute)[u]] = temp; } /* end if */ @@ -568,7 +598,7 @@ H5S_mpio_permute_type(const H5S_t *space, size_t elmt_size, hsize_t **permute, } /* end while */ /* Create the MPI datatype for the set of element displacements */ - if(H5S_mpio_create_point_datatype(elmt_size, num_points, disp, new_type) < 0) + if(H5S__mpio_create_point_datatype(elmt_size, num_points, disp, new_type) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create an MPI Datatype from point selection") /* Set values about MPI datatype created */ @@ -590,15 +620,15 @@ done: } /* end if */ FUNC_LEAVE_NOAPI(ret_value) -} /* H5S_mpio_permute_type() */ +} /* H5S__mpio_permute_type() */ /*------------------------------------------------------------------------- - * Function: H5S_mpio_hyper_type + * Function: H5S__mpio_reg_hyper_type * - * Purpose: Translate an HDF5 hyperslab selection into an MPI type. + * Purpose: Translate a regular HDF5 hyperslab selection into an MPI type. * - * Return: non-negative on success, negative on failure. + * Return: Non-negative on success, negative on failure. * * Outputs: *new_type the MPI type corresponding to the selection * *count how many objects of the new_type in selection @@ -606,14 +636,11 @@ done: * *is_derived_type 0 if MPI primitive type, 1 if derived * * Programmer: rky 980813 - * Modifications: - * Mohamad Chaarawi - * Adding support for large datatypes (beyond the limit of a - * 32 bit integer. + * *------------------------------------------------------------------------- */ static herr_t -H5S_mpio_hyper_type(const H5S_t *space, size_t elmt_size, +H5S__mpio_reg_hyper_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count, hbool_t *is_derived_type) { H5S_sel_iter_t sel_iter; /* Selection iteration info */ @@ -627,6 +654,7 @@ H5S_mpio_hyper_type(const H5S_t *space, size_t elmt_size, hsize_t count; } d[H5S_MAX_RANK]; + hsize_t bigio_count; /* Transition point to create derived type */ hsize_t offset[H5S_MAX_RANK]; hsize_t max_xtent[H5S_MAX_RANK]; H5S_hyper_dim_t *diminfo; /* [rank] */ @@ -639,14 +667,15 @@ H5S_mpio_hyper_type(const H5S_t *space, size_t elmt_size, int mpi_code; /* MPI return code */ herr_t ret_value = SUCCEED; - FUNC_ENTER_NOAPI_NOINIT + FUNC_ENTER_STATIC /* Check args */ HDassert(space); HDassert(sizeof(MPI_Aint) >= sizeof(elmt_size)); + bigio_count = H5_mpi_get_bigio_count(); /* Initialize selection iterator */ - if(H5S_select_iter_init(&sel_iter, space, elmt_size) < 0) + if(H5S_select_iter_init(&sel_iter, space, elmt_size, 0) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator") sel_iter_init = TRUE; /* Selection iteration info has been initialized */ @@ -654,16 +683,15 @@ H5S_mpio_hyper_type(const H5S_t *space, size_t elmt_size, diminfo = sel_iter.u.hyp.diminfo; HDassert(diminfo); - /* make a local copy of the dimension info so we can operate with them */ + /* Make a local copy of the dimension info so we can operate with them */ /* Check if this is a "flattened" regular hyperslab selection */ if(sel_iter.u.hyp.iter_rank != 0 && sel_iter.u.hyp.iter_rank < space->extent.rank) { /* Flattened selection */ rank = sel_iter.u.hyp.iter_rank; - HDassert(rank <= H5S_MAX_RANK); /* within array bounds */ #ifdef H5S_DEBUG - if(H5DEBUG(S)) - HDfprintf(H5DEBUG(S), "%s: Flattened selection\n",FUNC); +if(H5DEBUG(S)) + HDfprintf(H5DEBUG(S), "%s: Flattened selection\n",FUNC); #endif for(u = 0; u < rank; ++u) { H5_CHECK_OVERFLOW(diminfo[u].start, hsize_t, hssize_t) @@ -672,33 +700,30 @@ H5S_mpio_hyper_type(const H5S_t *space, size_t elmt_size, d[u].block = diminfo[u].block; d[u].count = diminfo[u].count; d[u].xtent = sel_iter.u.hyp.size[u]; + #ifdef H5S_DEBUG - if(H5DEBUG(S)){ - HDfprintf(H5DEBUG(S), "%s: start=%Hd stride=%Hu count=%Hu block=%Hu xtent=%Hu", - FUNC, d[u].start, d[u].strid, d[u].count, d[u].block, d[u].xtent ); - if (u==0) - HDfprintf(H5DEBUG(S), " rank=%u\n", rank ); - else - HDfprintf(H5DEBUG(S), "\n" ); - } +if(H5DEBUG(S)) { + HDfprintf(H5DEBUG(S), "%s: start=%Hd stride=%Hu count=%Hu block=%Hu xtent=%Hu", + FUNC, d[u].start, d[u].strid, d[u].count, d[u].block, d[u].xtent); + if(u == 0) + HDfprintf(H5DEBUG(S), " rank=%u\n", rank); + else + HDfprintf(H5DEBUG(S), "\n"); +} #endif - if(0 == d[u].block) - goto empty; - if(0 == d[u].count) - goto empty; - if(0 == d[u].xtent) - goto empty; + + /* Sanity check */ + HDassert(d[u].block > 0); + HDassert(d[u].count > 0); + HDassert(d[u].xtent > 0); } /* end for */ } /* end if */ else { /* Non-flattened selection */ rank = space->extent.rank; - HDassert(rank <= H5S_MAX_RANK); /* within array bounds */ - if(0 == rank) - goto empty; #ifdef H5S_DEBUG - if(H5DEBUG(S)) - HDfprintf(H5DEBUG(S),"%s: Non-flattened selection\n",FUNC); +if(H5DEBUG(S)) + HDfprintf(H5DEBUG(S),"%s: Non-flattened selection\n",FUNC); #endif for(u = 0; u < rank; ++u) { H5_CHECK_OVERFLOW(diminfo[u].start, hsize_t, hssize_t) @@ -707,22 +732,22 @@ H5S_mpio_hyper_type(const H5S_t *space, size_t elmt_size, d[u].block = diminfo[u].block; d[u].count = diminfo[u].count; d[u].xtent = space->extent.size[u]; + #ifdef H5S_DEBUG - if(H5DEBUG(S)){ +if(H5DEBUG(S)) { HDfprintf(H5DEBUG(S), "%s: start=%Hd stride=%Hu count=%Hu block=%Hu xtent=%Hu", - FUNC, d[u].start, d[u].strid, d[u].count, d[u].block, d[u].xtent ); - if (u==0) - HDfprintf(H5DEBUG(S), " rank=%u\n", rank ); + FUNC, d[u].start, d[u].strid, d[u].count, d[u].block, d[u].xtent); + if(u == 0) + HDfprintf(H5DEBUG(S), " rank=%u\n", rank); else - HDfprintf(H5DEBUG(S), "\n" ); - } + HDfprintf(H5DEBUG(S), "\n"); +} #endif - if(0 == d[u].block) - goto empty; - if(0 == d[u].count) - goto empty; - if(0 == d[u].xtent) - goto empty; + + /* Sanity check */ + HDassert(d[u].block > 0); + HDassert(d[u].count > 0); + HDassert(d[u].xtent > 0); } /* end for */ } /* end else */ @@ -733,56 +758,52 @@ H5S_mpio_hyper_type(const H5S_t *space, size_t elmt_size, offset[rank - 1] = 1; max_xtent[rank - 1] = d[rank - 1].xtent; #ifdef H5S_DEBUG - if(H5DEBUG(S)) { - i = ((int)rank) - 1; - HDfprintf(H5DEBUG(S), " offset[%2d]=%Hu; max_xtent[%2d]=%Hu\n", - i, offset[i], i, max_xtent[i]); - } +if(H5DEBUG(S)) { + i = ((int)rank) - 1; + HDfprintf(H5DEBUG(S), " offset[%2d]=%Hu; max_xtent[%2d]=%Hu\n", i, offset[i], i, max_xtent[i]); +} #endif for(i = ((int)rank) - 2; i >= 0; --i) { offset[i] = offset[i + 1] * d[i + 1].xtent; max_xtent[i] = max_xtent[i + 1] * d[i].xtent; #ifdef H5S_DEBUG - if(H5DEBUG(S)) - HDfprintf(H5DEBUG(S), " offset[%2d]=%Hu; max_xtent[%2d]=%Hu\n", - i, offset[i], i, max_xtent[i]); +if(H5DEBUG(S)) + HDfprintf(H5DEBUG(S), " offset[%2d]=%Hu; max_xtent[%2d]=%Hu\n", i, offset[i], i, max_xtent[i]); #endif } /* end for */ /* Create a type covering the selected hyperslab. * Multidimensional dataspaces are stored in row-major order. * The type is built from the inside out, going from the - * fastest-changing (i.e., inner) dimension * to the slowest (outer). */ + * fastest-changing (i.e., inner) dimension * to the slowest (outer). + */ /******************************************************* * Construct contig type for inner contig dims: *******************************************************/ #ifdef H5S_DEBUG - if(H5DEBUG(S)) { +if(H5DEBUG(S)) { HDfprintf(H5DEBUG(S), "%s: Making contig type %Zu MPI_BYTEs\n", FUNC, elmt_size); for(i = ((int)rank) - 1; i >= 0; --i) HDfprintf(H5DEBUG(S), "d[%d].xtent=%Hu \n", i, d[i].xtent); - } +} #endif /* LARGE_DATATYPE:: - * Check if the number of elements to form the inner type fits into a 32 bit integer. + * Check if the number of elements to form the inner type fits into a 32 bit integer. * If yes then just create the innertype with MPI_Type_contiguous. * Otherwise create a compound datatype by iterating as many times as needed * for the innertype to be created. */ if(bigio_count >= elmt_size) { - /* Use a single MPI datatype that has a 32 bit size */ - if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &inner_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) - } - else { - /* Create the compound datatype for this operation (> 2GB) */ - if (H5S_mpio_create_large_type (elmt_size, 0, MPI_BYTE, &inner_type) < 0) { - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, - "couldn't ccreate a large inner datatype in hyper selection") - } - } + /* Use a single MPI datatype that has a 32 bit size */ + if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &inner_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } /* end if */ + else + /* Create the compound datatype for this operation (> 2GB) */ + if(H5_mpio_create_large_type(elmt_size, 0, MPI_BYTE, &inner_type) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large inner datatype in hyper selection") /******************************************************* * Construct the type by walking the hyperslab dims @@ -790,106 +811,109 @@ H5S_mpio_hyper_type(const H5S_t *space, size_t elmt_size, *******************************************************/ for(i = ((int)rank) - 1; i >= 0; --i) { #ifdef H5S_DEBUG - if(H5DEBUG(S)) - HDfprintf(H5DEBUG(S), "%s: Dimension i=%d \n" - "start=%Hd count=%Hu block=%Hu stride=%Hu, xtent=%Hu max_xtent=%d\n", - FUNC, i, d[i].start, d[i].count, d[i].block, d[i].strid, d[i].xtent, max_xtent[i]); +if(H5DEBUG(S)) + HDfprintf(H5DEBUG(S), "%s: Dimension i=%d \n" + "start=%Hd count=%Hu block=%Hu stride=%Hu, xtent=%Hu max_xtent=%d\n", + FUNC, i, d[i].start, d[i].count, d[i].block, d[i].strid, d[i].xtent, max_xtent[i]); #endif #ifdef H5S_DEBUG - if(H5DEBUG(S)) - HDfprintf(H5DEBUG(S), "%s: i=%d Making vector-type \n", FUNC,i); +if(H5DEBUG(S)) + HDfprintf(H5DEBUG(S), "%s: i=%d Making vector-type \n", FUNC,i); #endif /**************************************** - * Build vector type of the selection. - ****************************************/ - if (bigio_count >= d[i].count && - bigio_count >= d[i].block && - bigio_count >= d[i].strid) { - - /* All the parameters fit into 32 bit integers so create the vector type normally */ - mpi_code = MPI_Type_vector((int)(d[i].count), /* count */ - (int)(d[i].block), /* blocklength */ - (int)(d[i].strid), /* stride */ - inner_type, /* old type */ - &outer_type); /* new type */ - - MPI_Type_free(&inner_type); - if(mpi_code != MPI_SUCCESS) - HMPI_GOTO_ERROR(FAIL, "couldn't create MPI vector type", mpi_code) - } + * Build vector type of the selection. + ****************************************/ + if(bigio_count >= d[i].count && + bigio_count >= d[i].block && bigio_count >= d[i].strid) { + /* All the parameters fit into 32 bit integers so create the vector type normally */ + mpi_code = MPI_Type_vector((int)(d[i].count), /* count */ + (int)(d[i].block), /* blocklength */ + (int)(d[i].strid), /* stride */ + inner_type, /* old type */ + &outer_type); /* new type */ + + MPI_Type_free(&inner_type); + if(mpi_code != MPI_SUCCESS) + HMPI_GOTO_ERROR(FAIL, "couldn't create MPI vector type", mpi_code) + } /* end if */ else { - /* Things get a bit more complicated and require LARGE_DATATYPE processing - * There are two MPI datatypes that need to be created: - * 1) an internal contiguous block; and - * 2) a collection of elements where an element is a contiguous block(1). - * Remember that the input arguments to the MPI-IO functions use integer - * values to represent element counts. We ARE allowed however, in the - * more recent MPI implementations to use constructed datatypes whereby - * the total number of bytes in a transfer could be : - * (2GB-1)number_of_blocks * the_datatype_extent. - */ + /* Things get a bit more complicated and require LARGE_DATATYPE processing + * There are two MPI datatypes that need to be created: + * 1) an internal contiguous block; and + * 2) a collection of elements where an element is a contiguous block(1). + * Remember that the input arguments to the MPI-IO functions use integer + * values to represent element counts. We ARE allowed however, in the + * more recent MPI implementations to use constructed datatypes whereby + * the total number of bytes in a transfer could be : + * (2GB-1)number_of_blocks * the_datatype_extent. + */ MPI_Aint stride_in_bytes, inner_extent; MPI_Datatype block_type; - /* create a contiguous datatype inner_type x number of BLOCKS. - * Again we need to check that the number of BLOCKS can fit into + /* Create a contiguous datatype inner_type x number of BLOCKS. + * Again we need to check that the number of BLOCKS can fit into * a 32 bit integer */ - if (bigio_count < d[i].block) { - if (H5S_mpio_create_large_type(d[i].block, 0, inner_type, - &block_type) < 0) { - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, - "couldn't ccreate a large block datatype in hyper selection") - } - } - else { - if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)d[i].block, - inner_type, - &block_type))) + if(bigio_count < d[i].block) { + if(H5_mpio_create_large_type(d[i].block, 0, inner_type, &block_type) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large block datatype in hyper selection") + } /* end if */ + else + if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)d[i].block, inner_type, &block_type))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) - } - MPI_Type_extent (inner_type, &inner_extent); + /* As of version 4.0, OpenMPI now turns off MPI-1 API calls by default, + * so we're using the MPI-2 version even though we don't need the lb + * value. + */ + { + MPI_Aint unused_lb_arg; + MPI_Type_get_extent(inner_type, &unused_lb_arg, &inner_extent); + } stride_in_bytes = inner_extent * (MPI_Aint)d[i].strid; /* If the element count is larger than what a 32 bit integer can hold, * we call the large type creation function to handle that */ - if (bigio_count < d[i].count) { - if (H5S_mpio_create_large_type (d[i].count, stride_in_bytes, block_type, - &outer_type) < 0) { - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, - "couldn't create a large outer datatype in hyper selection") - } - } + if(bigio_count < d[i].count) { + if(H5_mpio_create_large_type(d[i].count, stride_in_bytes, block_type, &outer_type) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large outer datatype in hyper selection") + } /* end if */ /* otherwise a regular create_hvector will do */ - else { - mpi_code = MPI_Type_create_hvector((int)d[i].count, /* count */ - 1, /* blocklength */ - stride_in_bytes, /* stride in bytes*/ - block_type, /* old type */ - &outer_type); /* new type */ - if(MPI_SUCCESS != mpi_code) + else + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector((int)d[i].count, /* count */ + 1, /* blocklength */ + stride_in_bytes, /* stride in bytes*/ + block_type, /* old type */ + &outer_type))) /* new type */ HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) - } + MPI_Type_free(&block_type); MPI_Type_free(&inner_type); - } - /**************************************** - * Then build the dimension type as (start, vector type, xtent). - ****************************************/ - /* calculate start and extent values of this dimension */ - start_disp = d[i].start * offset[i] * elmt_size; - new_extent = (MPI_Aint)elmt_size * max_xtent[i]; + } /* end else */ + + /**************************************** + * Then build the dimension type as (start, vector type, xtent). + ****************************************/ + + /* Calculate start and extent values of this dimension */ + /* Check if value overflow to cast to type MPI_Aint */ + if(d[i].start > LONG_MAX || offset[i] > LONG_MAX || elmt_size > LONG_MAX) + HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "result overflow") + start_disp = (MPI_Aint)d[i].start * (MPI_Aint)offset[i] * (MPI_Aint)elmt_size; + + if(max_xtent[i] > LONG_MAX) + HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "max_xtent overflow") + new_extent = (MPI_Aint)elmt_size * (MPI_Aint)max_xtent[i]; if(MPI_SUCCESS != (mpi_code = MPI_Type_get_extent(outer_type, &lb, &extent_len))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_get_extent failed", mpi_code) /************************************************* - * Restructure this datatype ("outer_type") - * so that it still starts at 0, but its extent - * is the full extent in this dimension. - *************************************************/ + * Restructure this datatype ("outer_type") + * so that it still starts at 0, but its extent + * is the full extent in this dimension. + *************************************************/ if(start_disp > 0 || extent_len < new_extent) { MPI_Datatype interm_type; int block_len = 1; @@ -909,10 +933,9 @@ H5S_mpio_hyper_type(const H5S_t *space, size_t elmt_size, else inner_type = outer_type; } /* end for */ -/*************************** -* End of loop, walking -* thru dimensions. -***************************/ +/****************************************** +* End of loop, walking through dimensions. +*******************************************/ /* At this point inner_type is actually the outermost type, even for 0-trip loop */ *new_type = inner_type; @@ -922,13 +945,6 @@ H5S_mpio_hyper_type(const H5S_t *space, size_t elmt_size, /* fill in the remaining return values */ *count = 1; /* only have to move one of these suckers! */ *is_derived_type = TRUE; - HGOTO_DONE(SUCCEED); - -empty: - /* special case: empty hyperslab */ - *new_type = MPI_BYTE; - *count = 0; - *is_derived_type = FALSE; done: /* Release selection iterator */ @@ -937,21 +953,20 @@ done: HDONE_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection iterator") #ifdef H5S_DEBUG - if(H5DEBUG(S)) - HDfprintf(H5DEBUG(S), "Leave %s, count=%ld is_derived_type=%t\n", - FUNC, *count, *is_derived_type ); +if(H5DEBUG(S)) + HDfprintf(H5DEBUG(S), "Leave %s, count=%ld is_derived_type=%t\n", FUNC, *count, *is_derived_type); #endif FUNC_LEAVE_NOAPI(ret_value) -} /* end H5S_mpio_hyper_type() */ +} /* end H5S__mpio_reg_hyper_type() */ /*------------------------------------------------------------------------- - * Function: H5S_mpio_span_hyper_type + * Function: H5S__mpio_span_hyper_type * * Purpose: Translate an HDF5 irregular hyperslab selection into an MPI type. * - * Return: non-negative on success, negative on failure. + * Return: Non-negative on success, negative on failure. * * Outputs: *new_type the MPI type corresponding to the selection * *count how many objects of the new_type in selection @@ -960,24 +975,23 @@ done: * * Programmer: kyang * - * Modifications: - * Mohamad Chaarawi - * Adding support for large datatypes (beyond the limit of a - * 32 bit integer. *------------------------------------------------------------------------- */ static herr_t -H5S_mpio_span_hyper_type(const H5S_t *space, size_t elmt_size, +H5S__mpio_span_hyper_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count, hbool_t *is_derived_type) { + H5S_mpio_mpitype_list_t type_list; /* List to track MPI data types created */ MPI_Datatype elmt_type; /* MPI datatype for an element */ hbool_t elmt_type_is_derived = FALSE; /* Whether the element type has been created */ MPI_Datatype span_type; /* MPI datatype for overall span tree */ + hsize_t bigio_count; /* Transition point to create derived type */ hsize_t down[H5S_MAX_RANK]; /* 'down' sizes for each dimension */ + uint64_t op_gen; /* Operation generation value */ int mpi_code; /* MPI return code */ herr_t ret_value = SUCCEED; /* Return value */ - FUNC_ENTER_NOAPI_NOINIT + FUNC_ENTER_STATIC /* Check args */ HDassert(space); @@ -985,30 +999,38 @@ H5S_mpio_span_hyper_type(const H5S_t *space, size_t elmt_size, HDassert(space->select.sel_info.hslab->span_lst); HDassert(space->select.sel_info.hslab->span_lst->head); + bigio_count = H5_mpi_get_bigio_count(); /* Create the base type for an element */ - if (bigio_count >= elmt_size) { - if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &elmt_type))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) - } - } - else { - if (H5S_mpio_create_large_type (elmt_size, 0, MPI_BYTE, &elmt_type) < 0) { - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, - "couldn't create a large element datatype in span_hyper selection") - } - } + if(bigio_count >= elmt_size) { + if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &elmt_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } /* end if */ + else + if(H5_mpio_create_large_type(elmt_size, 0, MPI_BYTE, &elmt_type) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large element datatype in span_hyper selection") elmt_type_is_derived = TRUE; /* Compute 'down' sizes for each dimension */ if(H5VM_array_down(space->extent.rank, space->extent.size, down) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGETSIZE, FAIL, "couldn't compute 'down' dimension sizes") - /* Obtain derived data type */ - if(H5S_obtain_datatype(down, space->select.sel_info.hslab->span_lst->head, &elmt_type, &span_type, elmt_size) < 0) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't obtain MPI derived data type") - if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(&span_type))) + /* Acquire an operation generation value for creating MPI datatypes */ + op_gen = H5S__hyper_get_op_gen(); + + /* Obtain derived MPI data type */ + /* Always use op_info[0] since we own this op_info, so there can be no + * simultaneous operations */ + type_list.head = type_list.tail = NULL; + if(H5S__obtain_datatype(space->select.sel_info.hslab->span_lst, down, elmt_size, &elmt_type, &span_type, &type_list, 0, op_gen) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't obtain MPI derived data type") + if(MPI_SUCCESS != (mpi_code = MPI_Type_dup(span_type, new_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) - *new_type = span_type; + + /* Release MPI data types generated during span tree traversal */ + if(H5S__release_datatype(&type_list) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "couldn't release MPI derived data type") /* fill in the remaining return values */ *count = 1; @@ -1021,16 +1043,62 @@ done: HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) FUNC_LEAVE_NOAPI(ret_value) -} /* end H5S_mpio_span_hyper_type() */ +} /* end H5S__mpio_span_hyper_type() */ /*------------------------------------------------------------------------- - * Function: H5S_obtain_datatype + * Function: H5S__release_datatype * - * Purpose: Obtain an MPI derived datatype based on span-tree - * implementation + * Purpose: Release the MPI derived datatypes for span-tree hyperslab selection * - * Return: non-negative on success, negative on failure. + * Return: Non-negative on success, negative on failure. + * + * Programmer: Quincey Koziol, February 2, 2019 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5S__release_datatype(H5S_mpio_mpitype_list_t *type_list) +{ + H5S_mpio_mpitype_node_t *curr; /* Pointer to head of list */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_STATIC + + /* Sanity check */ + HDassert(type_list); + + /* Iterate over the list, freeing the MPI data types */ + curr = type_list->head; + while(curr) { + H5S_mpio_mpitype_node_t *next; /* Pointer to next node in list */ + int mpi_code; /* MPI return status code */ + + /* Release the MPI data type for this span tree */ + if(MPI_SUCCESS != (mpi_code = MPI_Type_free(&curr->type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + + /* Get pointer to next node in list */ + next = curr->next; + + /* Free the current node */ + curr = H5FL_FREE(H5S_mpio_mpitype_node_t, curr); + + /* Advance to next node */ + curr = next; + } /* end while */ + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5S__release_datatype() */ + + +/*------------------------------------------------------------------------- + * Function: H5S__obtain_datatype + * + * Purpose: Obtain an MPI derived datatype for span-tree hyperslab selection + * + * Return: Non-negative on success, negative on failure. * * Outputs: *span_type the MPI type corresponding to the selection * @@ -1039,208 +1107,222 @@ done: *------------------------------------------------------------------------- */ static herr_t -H5S_obtain_datatype(const hsize_t *down, H5S_hyper_span_t *span, - const MPI_Datatype *elmt_type, MPI_Datatype *span_type, size_t elmt_size) +H5S__obtain_datatype(H5S_hyper_span_info_t *spans, const hsize_t *down, + size_t elmt_size, const MPI_Datatype *elmt_type, MPI_Datatype *span_type, + H5S_mpio_mpitype_list_t *type_list, unsigned op_info_i, uint64_t op_gen) { + H5S_hyper_span_t *span; /* Hyperslab span to iterate with */ + hsize_t bigio_count; /* Transition point to create derived type */ + size_t alloc_count = 0; /* Number of span tree nodes allocated at this level */ size_t outercount = 0; /* Number of span tree nodes at this level */ MPI_Datatype *inner_type = NULL; hbool_t inner_types_freed = FALSE; /* Whether the inner_type MPI datatypes have been freed */ - hbool_t span_type_valid = FALSE; /* Whether the span_type MPI datatypes is valid */ - hbool_t large_block = FALSE; /* Wether the block length is larger than 32 bit integer */ int *blocklen = NULL; MPI_Aint *disp = NULL; - H5S_hyper_span_t *tspan = NULL; /* Temporary pointer to span tree node */ + size_t u; /* Local index variable */ int mpi_code; /* MPI return status code */ herr_t ret_value = SUCCEED; /* Return value */ - FUNC_ENTER_NOAPI_NOINIT + FUNC_ENTER_STATIC /* Sanity check */ - HDassert(span); + HDassert(spans); + HDassert(type_list); + + bigio_count = H5_mpi_get_bigio_count(); + /* Check if we've visited this span tree before */ + if(spans->op_info[op_info_i].op_gen != op_gen) { + H5S_mpio_mpitype_node_t *type_node; /* Pointer to new node in MPI data type list */ + + /* Allocate the initial displacement & block length buffers */ + alloc_count = H5S_MPIO_INITIAL_ALLOC_COUNT; + if(NULL == (disp = (MPI_Aint *)H5MM_malloc(alloc_count * sizeof(MPI_Aint)))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of displacements") + if(NULL == (blocklen = (int *)H5MM_malloc(alloc_count * sizeof(int)))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of block lengths") + + /* If this is the fastest changing dimension, it is the base case for derived datatype. */ + span = spans->head; + if(NULL == span->down) { + hbool_t large_block = FALSE; /* Wether the block length is larger than 32 bit integer */ + + outercount = 0; + while(span) { + hsize_t nelmts; /* # of elements covered by current span */ + + /* Check if we need to increase the size of the buffers */ + if(outercount >= alloc_count) { + MPI_Aint *tmp_disp; /* Temporary pointer to new displacement buffer */ + int *tmp_blocklen; /* Temporary pointer to new block length buffer */ + + /* Double the allocation count */ + alloc_count *= 2; + + /* Re-allocate the buffers */ + if(NULL == (tmp_disp = (MPI_Aint *)H5MM_realloc(disp, alloc_count * sizeof(MPI_Aint)))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of displacements") + disp = tmp_disp; + if(NULL == (tmp_blocklen = (int *)H5MM_realloc(blocklen, alloc_count * sizeof(int)))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of block lengths") + blocklen = tmp_blocklen; + } /* end if */ - /* Allocate the initial displacement & block length buffers */ - alloc_count = H5S_MPIO_INITIAL_ALLOC_COUNT; - if(NULL == (disp = (MPI_Aint *)H5MM_malloc(alloc_count * sizeof(MPI_Aint)))) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of displacements") - if(NULL == (blocklen = (int *)H5MM_malloc(alloc_count * sizeof(int)))) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of block lengths") - - /* if this is the fastest changing dimension, it is the base case for derived datatype. */ - if(NULL == span->down) { - tspan = span; - outercount = 0; - while(tspan) { - /* Check if we need to increase the size of the buffers */ - if(outercount >= alloc_count) { - MPI_Aint *tmp_disp; /* Temporary pointer to new displacement buffer */ - int *tmp_blocklen; /* Temporary pointer to new block length buffer */ - - /* Double the allocation count */ - alloc_count *= 2; - - /* Re-allocate the buffers */ - if(NULL == (tmp_disp = (MPI_Aint *)H5MM_realloc(disp, alloc_count * sizeof(MPI_Aint)))) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of displacements") - disp = tmp_disp; - if(NULL == (tmp_blocklen = (int *)H5MM_realloc(blocklen, alloc_count * sizeof(int)))) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of block lengths") - blocklen = tmp_blocklen; + /* Compute the number of elements to attempt in this span */ + nelmts = (span->high - span->low) + 1; + + /* Store displacement & block length */ + disp[outercount] = (MPI_Aint)elmt_size * (MPI_Aint)span->low; + H5_CHECK_OVERFLOW(nelmts, hsize_t, int) + blocklen[outercount] = (int)nelmts; + + if(bigio_count < (hsize_t)blocklen[outercount]) + large_block = TRUE; /* at least one block type is large, so set this flag to true */ + + span = span->next; + outercount++; + } /* end while */ + + /* Everything fits into integers, so cast them and use hindexed */ + if(bigio_count >= outercount && large_block == FALSE) { + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)outercount, blocklen, disp, *elmt_type, &spans->op_info[op_info_i].u.down_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) } /* end if */ + else { /* LARGE_DATATYPE:: Something doesn't fit into a 32 bit integer */ + for(u = 0 ; u < outercount; u++) { + MPI_Datatype temp_type = MPI_DATATYPE_NULL; + + /* create the block type from elmt_type while checking the 32 bit int limit */ + if((hsize_t)(blocklen[u]) > bigio_count) { + if(H5_mpio_create_large_type((hsize_t)blocklen[u], 0, *elmt_type, &temp_type) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't create a large element datatype in span_hyper selection") + } /* end if */ + else + if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)blocklen[u], *elmt_type, &temp_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + + /* Combine the current datatype that is created with this current block type */ + if(0 == u) /* first iteration, there is no combined datatype yet */ + spans->op_info[op_info_i].u.down_type = temp_type; + else { + int bl[2] = {1, 1}; + MPI_Aint ds[2] = {disp[u - 1], disp[u]}; + MPI_Datatype dt[2] = {spans->op_info[op_info_i].u.down_type, temp_type}; + + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(2, /* count */ + bl, /* blocklength */ + ds, /* stride in bytes*/ + dt, /* old type */ + &spans->op_info[op_info_i].u.down_type))) /* new type */ + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) + + /* Release previous temporary datatype */ + if(MPI_SUCCESS != (mpi_code = MPI_Type_free(&temp_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + } /* end else */ + } /* end for */ + } /* end else (LARGE_DATATYPE::) */ + } /* end if */ + else { + MPI_Aint stride; /* Distance between inner MPI datatypes */ - /* Store displacement & block length */ - disp[outercount] = (MPI_Aint)elmt_size * tspan->low; - H5_CHECK_OVERFLOW(tspan->nelem, hsize_t, int) - blocklen[outercount] = (int)tspan->nelem; - tspan = tspan->next; + if(NULL == (inner_type = (MPI_Datatype *)H5MM_malloc(alloc_count * sizeof(MPI_Datatype)))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of inner MPI datatypes") - if (bigio_count < blocklen[outercount]) { - large_block = TRUE; /* at least one block type is large, so set this flag to true */ - } + /* Calculate the total bytes of the lower dimension */ + stride = (MPI_Aint)(*down) * (MPI_Aint)elmt_size; + + /* Loop over span nodes */ + outercount = 0; + while(span) { + MPI_Datatype down_type; /* Temporary MPI datatype for a span tree node's children */ + hsize_t nelmts; /* # of elements covered by current span */ + + /* Check if we need to increase the size of the buffers */ + if(outercount >= alloc_count) { + MPI_Aint *tmp_disp; /* Temporary pointer to new displacement buffer */ + int *tmp_blocklen; /* Temporary pointer to new block length buffer */ + MPI_Datatype *tmp_inner_type; /* Temporary pointer to inner MPI datatype buffer */ + + /* Double the allocation count */ + alloc_count *= 2; + + /* Re-allocate the buffers */ + if(NULL == (tmp_disp = (MPI_Aint *)H5MM_realloc(disp, alloc_count * sizeof(MPI_Aint)))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of displacements") + disp = tmp_disp; + if(NULL == (tmp_blocklen = (int *)H5MM_realloc(blocklen, alloc_count * sizeof(int)))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of block lengths") + blocklen = tmp_blocklen; + if(NULL == (tmp_inner_type = (MPI_Datatype *)H5MM_realloc(inner_type, alloc_count * sizeof(MPI_Datatype)))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of inner MPI datatypes") + inner_type = tmp_inner_type; + } /* end if */ - outercount++; - } /* end while */ - - /* Everything fits into integers, so cast them and use hindexed */ - if (bigio_count >= outercount && large_block == FALSE) { - - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)outercount, blocklen, disp, *elmt_type, span_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) - span_type_valid = TRUE; - } - else { /* LARGE_DATATYPE:: Something doesn't fit into a 32 bit integer */ - size_t i; - - for (i=0 ; i<outercount ; i++) { - MPI_Datatype temp_type = MPI_DATATYPE_NULL, outer_type = MPI_DATATYPE_NULL; - /* create the block type from elmt_type while checking the 32 bit int limit */ - if (blocklen[i] > bigio_count) { - if (H5S_mpio_create_large_type (blocklen[i], 0, *elmt_type, &temp_type) < 0) { - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, - "couldn't create a large element datatype in span_hyper selection") - } - } - else { - if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)blocklen[i], - *elmt_type, - &temp_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) - } - - /* combine the current datatype that is created with this current block type */ - if (0 == i) { /* first iteration, there is no combined datatype yet */ - *span_type = temp_type; - } - else { - int bl[2] = {1,1}; - MPI_Aint ds[2] = {disp[i-1],disp[i]}; - MPI_Datatype dt[2] = {*span_type, temp_type}; - - if (MPI_SUCCESS != (mpi_code = MPI_Type_create_struct (2, /* count */ - bl, /* blocklength */ - ds, /* stride in bytes*/ - dt, /* old type */ - &outer_type))){ /* new type */ - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) - } - *span_type = outer_type; - } - - if (outer_type != MPI_DATATYPE_NULL) - MPI_Type_free(&outer_type); - /* temp_type shouldn't be freed here... - * Note that we have simply copied it above (not MPI_Type_dup) - * into the 'span_type' argument of the caller. - * The caller needs to deal with it there! - */ - } - } /* end (LARGE_DATATYPE::) */ + /* Displacement should be in byte and should have dimension information */ + /* First using MPI Type vector to build derived data type for this span only */ + /* Need to calculate the disp in byte for this dimension. */ + disp[outercount] = (MPI_Aint)span->low * stride; + blocklen[outercount] = 1; - } /* end if */ - else { - size_t u; /* Local index variable */ + /* Generate MPI datatype for next dimension down */ + if(H5S__obtain_datatype(span->down, down + 1, elmt_size, elmt_type, &down_type, type_list, op_info_i, op_gen) < 0) + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't obtain MPI derived data type") - if(NULL == (inner_type = (MPI_Datatype *)H5MM_malloc(alloc_count * sizeof(MPI_Datatype)))) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of inner MPI datatypes") + /* Compute the number of elements to attempt in this span */ + nelmts = (span->high - span->low) + 1; - tspan = span; - outercount = 0; - while(tspan) { - MPI_Datatype down_type; /* Temporary MPI datatype for a span tree node's children */ - MPI_Aint stride; /* Distance between inner MPI datatypes */ + /* Build the MPI datatype for this node */ + H5_CHECK_OVERFLOW(nelmts, hsize_t, int) + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector((int)nelmts, 1, stride, down_type, &inner_type[outercount]))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) - /* Check if we need to increase the size of the buffers */ - if(outercount >= alloc_count) { - MPI_Aint *tmp_disp; /* Temporary pointer to new displacement buffer */ - int *tmp_blocklen; /* Temporary pointer to new block length buffer */ - MPI_Datatype *tmp_inner_type; /* Temporary pointer to inner MPI datatype buffer */ - - /* Double the allocation count */ - alloc_count *= 2; - - /* Re-allocate the buffers */ - if(NULL == (tmp_disp = (MPI_Aint *)H5MM_realloc(disp, alloc_count * sizeof(MPI_Aint)))) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of displacements") - disp = tmp_disp; - if(NULL == (tmp_blocklen = (int *)H5MM_realloc(blocklen, alloc_count * sizeof(int)))) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of block lengths") - blocklen = tmp_blocklen; - if(NULL == (tmp_inner_type = (MPI_Datatype *)H5MM_realloc(inner_type, alloc_count * sizeof(MPI_Datatype)))) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of inner MPI datatypes") - inner_type = tmp_inner_type; - } /* end if */ + span = span->next; + outercount++; + } /* end while */ - /* Displacement should be in byte and should have dimension information */ - /* First using MPI Type vector to build derived data type for this span only */ - /* Need to calculate the disp in byte for this dimension. */ - /* Calculate the total bytes of the lower dimension */ - disp[outercount] = tspan->low * (*down) * elmt_size; - blocklen[outercount] = 1; - - /* Generate MPI datatype for next dimension down */ - if(H5S_obtain_datatype(down + 1, tspan->down->head, elmt_type, &down_type, elmt_size) < 0) - HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't obtain MPI derived data type") - - /* Build the MPI datatype for this node */ - stride = (*down) * elmt_size; - H5_CHECK_OVERFLOW(tspan->nelem, hsize_t, int) - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector((int)tspan->nelem, 1, stride, down_type, &inner_type[outercount]))) { - MPI_Type_free(&down_type); - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) - } /* end if */ + /* Building the whole vector datatype */ + H5_CHECK_OVERFLOW(outercount, size_t, int) + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_struct((int)outercount, blocklen, disp, inner_type, &spans->op_info[op_info_i].u.down_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) + + /* Release inner node types */ + for(u = 0; u < outercount; u++) + if(MPI_SUCCESS != (mpi_code = MPI_Type_free(&inner_type[u]))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code) + inner_types_freed = TRUE; + } /* end else */ + + /* Allocate space for the MPI data type list node */ + if(NULL == (type_node = H5FL_MALLOC(H5S_mpio_mpitype_node_t))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate MPI data type list node") + + /* Set up MPI type node */ + type_node->type = spans->op_info[op_info_i].u.down_type; + type_node->next = NULL; + + /* Add MPI type node to list */ + if(type_list->head == NULL) + type_list->head = type_list->tail = type_node; + else { + type_list->tail->next = type_node; + type_list->tail = type_node; + } /* end else */ - /* Release MPI datatype for next dimension down */ - if(MPI_SUCCESS != (mpi_code = MPI_Type_free(&down_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code) - - tspan = tspan->next; - outercount++; - } /* end while */ - - /* building the whole vector datatype */ - H5_CHECK_OVERFLOW(outercount, size_t, int) - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_struct((int)outercount, blocklen, disp, inner_type, span_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) - span_type_valid = TRUE; - - /* Release inner node types */ - for(u = 0; u < outercount; u++) - if(MPI_SUCCESS != (mpi_code = MPI_Type_free(&inner_type[u]))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code) - inner_types_freed = TRUE; + /* Remember that we've visited this span tree */ + spans->op_info[op_info_i].op_gen = op_gen; } /* end else */ + /* Return MPI data type for span tree */ + *span_type = spans->op_info[op_info_i].u.down_type; + done: /* General cleanup */ if(inner_type != NULL) { - if(!inner_types_freed) { - size_t u; /* Local index variable */ - + if(!inner_types_freed) for(u = 0; u < outercount; u++) if(MPI_SUCCESS != (mpi_code = MPI_Type_free(&inner_type[u]))) HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) - } /* end if */ - H5MM_free(inner_type); } /* end if */ if(blocklen != NULL) @@ -1248,15 +1330,8 @@ done: if(disp != NULL) H5MM_free(disp); - /* Error cleanup */ - if(ret_value < 0) { - if(span_type_valid) - if(MPI_SUCCESS != (mpi_code = MPI_Type_free(span_type))) - HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code) - } /* end if */ - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5S_obtain_datatype() */ +} /* end H5S__obtain_datatype() */ /*------------------------------------------------------------------------- @@ -1265,7 +1340,7 @@ done: * Purpose: Translate an HDF5 dataspace selection into an MPI type. * Currently handle only hyperslab and "all" selections. * - * Return: non-negative on success, negative on failure. + * Return: Non-negative on success, negative on failure. * * Outputs: *new_type the MPI type corresponding to the selection * *count how many objects of the new_type in selection @@ -1277,7 +1352,7 @@ done: *------------------------------------------------------------------------- */ herr_t -H5S_mpio_space_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, +H5S_mpio_space_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count, hbool_t *is_derived_type, hbool_t do_permute, hsize_t **permute_map, hbool_t *is_permuted) { @@ -1298,10 +1373,10 @@ H5S_mpio_space_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type * out-of-order point selection, then permute this selection which * should be a memory selection to match the file space permutation. */ - if(TRUE == *is_permuted) { + if(TRUE == *is_permuted) { switch(H5S_GET_SELECT_TYPE(space)) { case H5S_SEL_NONE: - if(H5S_mpio_none_type(new_type, count, is_derived_type) < 0) + if(H5S__mpio_none_type(new_type, count, is_derived_type) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't convert 'none' selection to MPI type") break; @@ -1311,7 +1386,7 @@ H5S_mpio_space_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type /* Sanity check */ HDassert(!do_permute); - if(H5S_mpio_permute_type(space, elmt_size, permute_map, new_type, count, is_derived_type) < 0) + if(H5S__mpio_permute_type(space, elmt_size, permute_map, new_type, count, is_derived_type) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't convert 'all' selection to MPI type") break; @@ -1326,29 +1401,28 @@ H5S_mpio_space_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type else { switch(H5S_GET_SELECT_TYPE(space)) { case H5S_SEL_NONE: - if(H5S_mpio_none_type(new_type, count, is_derived_type) < 0) + if(H5S__mpio_none_type(new_type, count, is_derived_type) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert 'none' selection to MPI type") break; case H5S_SEL_ALL: - if(H5S_mpio_all_type(space, elmt_size, new_type, count, is_derived_type) < 0) + if(H5S__mpio_all_type(space, elmt_size, new_type, count, is_derived_type) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert 'all' selection to MPI type") break; case H5S_SEL_POINTS: - if(H5S_mpio_point_type(space, elmt_size, new_type, count, is_derived_type, do_permute, permute_map, is_permuted) < 0) + if(H5S__mpio_point_type(space, elmt_size, new_type, count, is_derived_type, do_permute, permute_map, is_permuted) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, "couldn't convert 'point' selection to MPI type") break; case H5S_SEL_HYPERSLABS: if((H5S_SELECT_IS_REGULAR(space) == TRUE)) { - if(H5S_mpio_hyper_type(space, elmt_size, new_type, count, is_derived_type) < 0) + if(H5S__mpio_reg_hyper_type(space, elmt_size, new_type, count, is_derived_type) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert regular 'hyperslab' selection to MPI type") } /* end if */ - else { - if(H5S_mpio_span_hyper_type(space, elmt_size, new_type, count, is_derived_type) < 0) + else + if(H5S__mpio_span_hyper_type(space, elmt_size, new_type, count, is_derived_type) < 0) HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert irregular 'hyperslab' selection to MPI type") - } /* end else */ break; case H5S_SEL_ERROR: @@ -1362,7 +1436,7 @@ H5S_mpio_space_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type case H5S_NO_CLASS: default: - HDassert("unknown data space type" && 0); + HDassert("unknown dataspace type" && 0); break; } /* end switch */ @@ -1370,138 +1444,5 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5S_mpio_space_type() */ - -/*------------------------------------------------------------------------- - * Function: H5S_mpio_create_large_type - * - * Purpose: Create a large datatype of size larger than what a 32 bit integer - * can hold. - * - * Return: non-negative on success, negative on failure. - * - * *new_type the new datatype created - * - * Programmer: Mohamad Chaarawi - * - *------------------------------------------------------------------------- - */ -static herr_t H5S_mpio_create_large_type (hsize_t num_elements, - MPI_Aint stride_bytes, - MPI_Datatype old_type, - MPI_Datatype *new_type) -{ - int num_big_types; /* num times the 2G datatype will be repeated */ - int remaining_bytes; /* the number of bytes left that can be held in an int value */ - hsize_t leftover; - int block_len[2]; - int mpi_code; /* MPI return code */ - MPI_Datatype inner_type, outer_type, leftover_type, type[2]; - MPI_Aint disp[2], old_extent; - herr_t ret_value = SUCCEED; /* Return value */ - - FUNC_ENTER_NOAPI_NOINIT - - /* Calculate how many Big MPI datatypes are needed to represent the buffer */ - num_big_types = (int)(num_elements/bigio_count); - leftover = num_elements - num_big_types * (hsize_t)bigio_count; - H5_CHECKED_ASSIGN(remaining_bytes, int, leftover, hsize_t); - - /* Create a contiguous datatype of size equal to the largest - * number that a 32 bit integer can hold x size of old type. - * If the displacement is 0, then the type is contiguous, otherwise - * use type_hvector to create the type with the displacement provided - */ - if (0 == stride_bytes) { - if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(bigio_count, - old_type, - &inner_type))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) - } - } - else { - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector (bigio_count, - 1, - stride_bytes, - old_type, - &inner_type))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) - } - } - - /* Create a contiguous datatype of the buffer (minus the remaining < 2GB part) - * If a stride is present, use hvector type - */ - if (0 == stride_bytes) { - if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(num_big_types, - inner_type, - &outer_type))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) - } - } - else { - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector (num_big_types, - 1, - stride_bytes, - inner_type, - &outer_type))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) - } - } - - MPI_Type_free(&inner_type); - - /* If there is a remaining part create a contiguous/vector datatype and then - * use a struct datatype to encapsulate everything. - */ - if(remaining_bytes) { - if (stride_bytes == 0) { - if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous (remaining_bytes, - old_type, - &leftover_type))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) - } - } - else { - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector - ((int)(num_elements - (hsize_t)num_big_types*bigio_count), - 1, - stride_bytes, - old_type, - &leftover_type))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) - } - } - - MPI_Type_extent (old_type, &old_extent); - - /* Set up the arguments for MPI_Type_struct constructor */ - type[0] = outer_type; - type[1] = leftover_type; - block_len[0] = 1; - block_len[1] = 1; - disp[0] = 0; - disp[1] = (old_extent+stride_bytes)*num_big_types*(MPI_Aint)bigio_count; - - if(MPI_SUCCESS != (mpi_code = - MPI_Type_create_struct(2, block_len, disp, type, new_type))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) - } - - MPI_Type_free(&outer_type); - MPI_Type_free(&leftover_type); - } - else { - /* There are no remaining bytes so just set the new type to - * the outer type created */ - *new_type = outer_type; - } - - if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) - -done: - FUNC_LEAVE_NOAPI(ret_value) -} /* end H5S_mpio_create_large_type */ - #endif /* H5_HAVE_PARALLEL */ |