From 47a4cd816cff5e1223f66614e150777f9dca7af0 Mon Sep 17 00:00:00 2001 From: Richard Warren Date: Wed, 5 Jul 2017 15:53:19 -0400 Subject: Commit changes needed for pull request --- testpar/t_bigio.c | 2097 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2097 insertions(+) create mode 100644 testpar/t_bigio.c diff --git a/testpar/t_bigio.c b/testpar/t_bigio.c new file mode 100644 index 0000000..2f80e45 --- /dev/null +++ b/testpar/t_bigio.c @@ -0,0 +1,2097 @@ + +#include "hdf5.h" +#include "testphdf5.h" +#include "H5Dprivate.h" /* For Chunk tests */ + +// int TestVerbosity = VERBO_LO; /* Default Verbosity is Low */ + +/* Constants definitions */ +#define MAX_ERR_REPORT 10 /* Maximum number of errors reported */ + +/* Define some handy debugging shorthands, routines, ... */ +/* debugging tools */ + +#define MAINPROCESS (!mpi_rank) /* define process 0 as main process */ + +/* Constants definitions */ +#define RANK 2 + +#define IN_ORDER 1 +#define OUT_OF_ORDER 2 + +#define DATASET1 "DSET1" +#define DATASET2 "DSET2" +#define DATASET3 "DSET3" +#define DATASET4 "DSET4" +#define DATASET5 "DSET5" +#define DXFER_COLLECTIVE_IO 0x1 /* Collective IO*/ +#define DXFER_INDEPENDENT_IO 0x2 /* Independent IO collectively */ +#define DXFER_BIGCOUNT 536870916 + +#define HYPER 1 +#define POINT 2 +#define ALL 3 + +/* Dataset data type. Int's can be easily octo dumped. */ +typedef hsize_t B_DATATYPE; + +int facc_type = FACC_MPIO; /*Test file access type */ +int dxfer_coll_type = DXFER_COLLECTIVE_IO; +size_t bigcount = DXFER_BIGCOUNT; +char filename[20] = "bigio_test.h5"; +int nerrors = 0; +int mpi_size, mpi_rank; + +hsize_t space_dim1 = SPACE_DIM1 * 256; // 4096 +hsize_t space_dim2 = SPACE_DIM2; + +static void coll_chunktest(const char* filename, int chunk_factor, int select_factor, + int api_option, int file_selection, int mem_selection, int mode); +hid_t create_faccess_plist(MPI_Comm comm, MPI_Info info, int l_facc_type); + +/* + * Setup the coordinates for point selection. + */ +static void +set_coords(hsize_t start[], + hsize_t count[], + hsize_t stride[], + hsize_t block[], + size_t num_points, + hsize_t coords[], + int order) +{ + hsize_t i,j, k = 0, m ,n, s1 ,s2; + + if(OUT_OF_ORDER == order) + k = (num_points * RANK) - 1; + else if(IN_ORDER == order) + k = 0; + + s1 = start[0]; + s2 = start[1]; + + for(i = 0 ; i < count[0]; i++) + for(j = 0 ; j < count[1]; j++) + for(m = 0 ; m < block[0]; m++) + for(n = 0 ; n < block[1]; n++) + if(OUT_OF_ORDER == order) { + coords[k--] = s2 + (stride[1] * j) + n; + coords[k--] = s1 + (stride[0] * i) + m; + } + else if(IN_ORDER == order) { + coords[k++] = s1 + stride[0] * i + m; + coords[k++] = s2 + stride[1] * j + n; + } +} + +/* + * Fill the dataset with trivial data for testing. + * Assume dimension rank is 2 and data is stored contiguous. + */ +static void +fill_datasets(hsize_t start[], hsize_t block[], B_DATATYPE * dataset) +{ + B_DATATYPE *dataptr = dataset; + hsize_t i, j; + + /* put some trivial data in the data_array */ + for (i=0; i < block[0]; i++){ + for (j=0; j < block[1]; j++){ + *dataptr = (B_DATATYPE)((i+start[0])*100 + (j+start[1]+1)); + dataptr++; + } + } +} + +/* + * Setup the coordinates for point selection. + */ +void point_set(hsize_t start[], + hsize_t count[], + hsize_t stride[], + hsize_t block[], + size_t num_points, + hsize_t coords[], + int order) +{ + hsize_t i,j, k = 0, m ,n, s1 ,s2; + + HDcompile_assert(RANK == 2); + + if(OUT_OF_ORDER == order) + k = (num_points * RANK) - 1; + else if(IN_ORDER == order) + k = 0; + + s1 = start[0]; + s2 = start[1]; + + for(i = 0 ; i < count[0]; i++) + for(j = 0 ; j < count[1]; j++) + for(m = 0 ; m < block[0]; m++) + for(n = 0 ; n < block[1]; n++) + if(OUT_OF_ORDER == order) { + coords[k--] = s2 + (stride[1] * j) + n; + coords[k--] = s1 + (stride[0] * i) + m; + } + else if(IN_ORDER == order) { + coords[k++] = s1 + stride[0] * i + m; + coords[k++] = s2 + stride[1] * j + n; + } + + if(VERBOSE_MED) { + printf("start[]=(%lu, %lu), count[]=(%lu, %lu), stride[]=(%lu, %lu), block[]=(%lu, %lu), total datapoints=%lu\n", + (unsigned long)start[0], (unsigned long)start[1], (unsigned long)count[0], (unsigned long)count[1], + (unsigned long)stride[0], (unsigned long)stride[1], (unsigned long)block[0], (unsigned long)block[1], + (unsigned long)(block[0] * block[1] * count[0] * count[1])); + k = 0; + for(i = 0; i < num_points ; i++) { + printf("(%d, %d)\n", (int)coords[k], (int)coords[k + 1]); + k += 2; + } + } +} + +/* + * Print the content of the dataset. + */ +static void +dataset_print(hsize_t start[], hsize_t block[], B_DATATYPE * dataset) +{ + B_DATATYPE *dataptr = dataset; + hsize_t i, j; + + /* print the column heading */ + printf("%-8s", "Cols:"); + for (j=0; j < block[1]; j++){ + printf("%3lu ", (unsigned long)(start[1]+j)); + } + printf("\n"); + + /* print the slab data */ + for (i=0; i < block[0]; i++){ + printf("Row %2lu: ", (unsigned long)(i+start[0])); + for (j=0; j < block[1]; j++){ + printf("%llu ", *dataptr++); + } + printf("\n"); + } +} + + +/* + * Print the content of the dataset. + */ +static int +verify_data(hsize_t start[], hsize_t count[], hsize_t stride[], hsize_t block[], B_DATATYPE *dataset, B_DATATYPE *original) +{ + hsize_t i, j; + int vrfyerrs; + + /* print it if VERBOSE_MED */ + if(VERBOSE_MED) { + printf("verify_data dumping:::\n"); + printf("start(%lu, %lu), count(%lu, %lu), stride(%lu, %lu), block(%lu, %lu)\n", + (unsigned long)start[0], (unsigned long)start[1], (unsigned long)count[0], (unsigned long)count[1], + (unsigned long)stride[0], (unsigned long)stride[1], (unsigned long)block[0], (unsigned long)block[1]); + printf("original values:\n"); + dataset_print(start, block, original); + printf("compared values:\n"); + dataset_print(start, block, dataset); + } + + vrfyerrs = 0; + for (i=0; i < block[0]; i++){ + for (j=0; j < block[1]; j++){ + if(*dataset != *original){ + if(vrfyerrs++ < MAX_ERR_REPORT || VERBOSE_MED){ + printf("Dataset Verify failed at [%lu][%lu](row %lu, col %lu): expect %llu, got %llu\n", + (unsigned long)i, (unsigned long)j, + (unsigned long)(i+start[0]), (unsigned long)(j+start[1]), + *(original), *(dataset)); + } + dataset++; + original++; + } + } + } + if(vrfyerrs > MAX_ERR_REPORT && !VERBOSE_MED) + printf("[more errors ...]\n"); + if(vrfyerrs) + printf("%d errors found in verify_data\n", vrfyerrs); + return(vrfyerrs); +} + +/* Set up the selection */ +static void +ccslab_set(int mpi_rank, + int mpi_size, + hsize_t start[], + hsize_t count[], + hsize_t stride[], + hsize_t block[], + int mode) +{ + + switch (mode){ + + case BYROW_CONT: + /* Each process takes a slabs of rows. */ + block[0] = 1; + block[1] = 1; + stride[0] = 1; + stride[1] = 1; + count[0] = space_dim1; + count[1] = space_dim2; + start[0] = mpi_rank*count[0]; + start[1] = 0; + + break; + + case BYROW_DISCONT: + /* Each process takes several disjoint blocks. */ + block[0] = 1; + block[1] = 1; + stride[0] = 3; + stride[1] = 3; + count[0] = space_dim1/(stride[0]*block[0]); + count[1] = (space_dim2)/(stride[1]*block[1]); + start[0] = space_dim1*mpi_rank; + start[1] = 0; + + break; + + case BYROW_SELECTNONE: + /* Each process takes a slabs of rows, there are + no selections for the last process. */ + block[0] = 1; + block[1] = 1; + stride[0] = 1; + stride[1] = 1; + count[0] = ((mpi_rank >= MAX(1,(mpi_size-2)))?0:space_dim1); + count[1] = space_dim2; + start[0] = mpi_rank*count[0]; + start[1] = 0; + + break; + + case BYROW_SELECTUNBALANCE: + /* The first one-third of the number of processes only + select top half of the domain, The rest will select the bottom + half of the domain. */ + + block[0] = 1; + count[0] = 2; + stride[0] = space_dim1*mpi_size/4+1; + block[1] = space_dim2; + count[1] = 1; + start[1] = 0; + stride[1] = 1; + if((mpi_rank *3)<(mpi_size*2)) start[0] = mpi_rank; + else start[0] = 1 + space_dim1*mpi_size/2 + (mpi_rank-2*mpi_size/3); + break; + + case BYROW_SELECTINCHUNK: + /* Each process will only select one chunk */ + + block[0] = 1; + count[0] = 1; + start[0] = mpi_rank*space_dim1; + stride[0]= 1; + block[1] = space_dim2; + count[1] = 1; + stride[1]= 1; + start[1] = 0; + + break; + + default: + /* Unknown mode. Set it to cover the whole dataset. */ + block[0] = space_dim1*mpi_size; + block[1] = space_dim2; + stride[0] = block[0]; + stride[1] = block[1]; + count[0] = 1; + count[1] = 1; + start[0] = 0; + start[1] = 0; + + break; + } + if (VERBOSE_MED){ + printf("start[]=(%lu,%lu), count[]=(%lu,%lu), stride[]=(%lu,%lu), block[]=(%lu,%lu), total datapoints=%lu\n", + (unsigned long)start[0], (unsigned long)start[1], (unsigned long)count[0], (unsigned long)count[1], + (unsigned long)stride[0], (unsigned long)stride[1], (unsigned long)block[0], (unsigned long)block[1], + (unsigned long)(block[0]*block[1]*count[0]*count[1])); + } +} + + +/* + * Fill the dataset with trivial data for testing. + * Assume dimension rank is 2. + */ +static void +ccdataset_fill(hsize_t start[], + hsize_t stride[], + hsize_t count[], + hsize_t block[], + DATATYPE * dataset, + int mem_selection) +{ + DATATYPE *dataptr = dataset; + DATATYPE *tmptr; + hsize_t i,j,k1,k2,k=0; + /* put some trivial data in the data_array */ + tmptr = dataptr; + + /* assign the disjoint block (two-dimensional)data array value + through the pointer */ + + for (k1 = 0; k1 < count[0]; k1++) { + for(i = 0; i < block[0]; i++) { + for(k2 = 0; k2 < count[1]; k2++) { + for(j = 0;j < block[1]; j++) { + + if (ALL != mem_selection) { + dataptr = tmptr + ((start[0]+k1*stride[0]+i)*space_dim2+ + start[1]+k2*stride[1]+j); + } + else { + dataptr = tmptr + k; + k++; + } + + *dataptr = (DATATYPE)(k1+k2+i+j); + } + } + } + } +} + +/* + * Print the first block of the content of the dataset. + */ +static void +ccdataset_print(hsize_t start[], + hsize_t block[], + DATATYPE * dataset) + +{ + DATATYPE *dataptr = dataset; + hsize_t i, j; + + /* print the column heading */ + printf("Print only the first block of the dataset\n"); + printf("%-8s", "Cols:"); + for (j=0; j < block[1]; j++){ + printf("%3lu ", (unsigned long)(start[1]+j)); + } + printf("\n"); + + /* print the slab data */ + for (i=0; i < block[0]; i++){ + printf("Row %2lu: ", (unsigned long)(i+start[0])); + for (j=0; j < block[1]; j++){ + printf("%03d ", *dataptr++); + } + printf("\n"); + } +} + +/* + * Print the content of the dataset. + */ +static int +ccdataset_vrfy(hsize_t start[], + hsize_t count[], + hsize_t stride[], + hsize_t block[], + DATATYPE *dataset, + DATATYPE *original, + int mem_selection) +{ + hsize_t i, j,k1,k2,k=0; + int vrfyerrs; + DATATYPE *dataptr,*oriptr; + + /* print it if VERBOSE_MED */ + if (VERBOSE_MED) { + printf("dataset_vrfy dumping:::\n"); + printf("start(%lu, %lu), count(%lu, %lu), stride(%lu, %lu), block(%lu, %lu)\n", + (unsigned long)start[0], (unsigned long)start[1], (unsigned long)count[0], (unsigned long)count[1], + (unsigned long)stride[0], (unsigned long)stride[1], (unsigned long)block[0], (unsigned long)block[1]); + printf("original values:\n"); + ccdataset_print(start, block, original); + printf("compared values:\n"); + ccdataset_print(start, block, dataset); + } + + vrfyerrs = 0; + + for (k1=0;k1 MAX_ERR_REPORT && !VERBOSE_MED) + printf("[more errors ...]\n"); + if (vrfyerrs) + printf("%d errors found in ccdataset_vrfy\n", vrfyerrs); + return(vrfyerrs); +} + +/* + * Example of using the parallel HDF5 library to create two datasets + * in one HDF5 file with collective parallel access support. + * The Datasets are of sizes (number-of-mpi-processes x dim0) x dim1. + * Each process controls only a slab of size dim0 x dim1 within each + * dataset. [Note: not so yet. Datasets are of sizes dim0xdim1 and + * each process controls a hyperslab within.] + */ + +static void +dataset_big_write(void) +{ + + hid_t xfer_plist; /* Dataset transfer properties list */ + hid_t sid; /* Dataspace ID */ + hid_t file_dataspace; /* File dataspace ID */ + hid_t mem_dataspace; /* memory dataspace ID */ + hid_t dataset; + hid_t datatype; /* Datatype ID */ + hsize_t dims[RANK]; /* dataset dim sizes */ + hsize_t start[RANK]; /* for hyperslab setting */ + hsize_t count[RANK], stride[RANK]; /* for hyperslab setting */ + hsize_t block[RANK]; /* for hyperslab setting */ + hsize_t *coords = NULL; + int i; + herr_t ret; /* Generic return value */ + hid_t fid; /* HDF5 file ID */ + hid_t acc_tpl; /* File access templates */ + hsize_t h; + size_t num_points; + B_DATATYPE * wdata; + + + /* allocate memory for data buffer */ + wdata = (B_DATATYPE *)malloc(bigcount*sizeof(B_DATATYPE)); + VRFY((wdata != NULL), "wdata malloc succeeded"); + + /* setup file access template */ + acc_tpl = H5Pcreate (H5P_FILE_ACCESS); + VRFY((acc_tpl >= 0), "H5P_FILE_ACCESS"); + H5Pset_fapl_mpio(acc_tpl, MPI_COMM_WORLD, MPI_INFO_NULL); + + /* create the file collectively */ + fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, acc_tpl); + VRFY((fid >= 0), "H5Fcreate succeeded"); + + /* Release file-access template */ + ret = H5Pclose(acc_tpl); + VRFY((ret >= 0), ""); + + + /* Each process takes a slabs of rows. */ + printf("\nTesting Dataset1 write by ROW\n"); + /* Create a large dataset */ + dims[0] = bigcount; + dims[1] = mpi_size; + sid = H5Screate_simple (RANK, dims, NULL); + VRFY((sid >= 0), "H5Screate_simple succeeded"); + dataset = H5Dcreate2(fid, DATASET1, H5T_NATIVE_LLONG, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + VRFY((dataset >= 0), "H5Dcreate2 succeeded"); + H5Sclose(sid); + + block[0] = dims[0]/mpi_size; + block[1] = dims[1]; + stride[0] = block[0]; + stride[1] = block[1]; + count[0] = 1; + count[1] = 1; + start[0] = mpi_rank*block[0]; + start[1] = 0; + + /* create a file dataspace independently */ + file_dataspace = H5Dget_space (dataset); + VRFY((file_dataspace >= 0), "H5Dget_space succeeded"); + ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((ret >= 0), "H5Sset_hyperslab succeeded"); + + /* create a memory dataspace independently */ + mem_dataspace = H5Screate_simple (RANK, block, NULL); + VRFY((mem_dataspace >= 0), ""); + + /* fill the local slab with some trivial data */ + fill_datasets(start, block, wdata); + MESG("data_array initialized"); + if(VERBOSE_MED){ + MESG("data_array created"); + dataset_print(start, block, wdata); + } + + /* set up the collective transfer properties list */ + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + VRFY((xfer_plist >= 0), "H5Pcreate xfer succeeded"); + ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); + if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); + } + + /* write data collectively */ + MESG("writeAll by Row"); + { + int j,k =0; + for (i=0; i < block[0]; i++){ + for (j=0; j < block[1]; j++){ + if(k < 10) { + printf("%lld ", wdata[k]); + k++; + } + } + } + printf("\n"); + } + + ret = H5Dwrite(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace, + xfer_plist, wdata); + VRFY((ret >= 0), "H5Dwrite dataset1 succeeded"); + + /* release all temporary handles. */ + H5Sclose(file_dataspace); + H5Sclose(mem_dataspace); + H5Pclose(xfer_plist); + + ret = H5Dclose(dataset); + VRFY((ret >= 0), "H5Dclose1 succeeded"); + + + + /* Each process takes a slabs of cols. */ + printf("\nTesting Dataset2 write by COL\n"); + /* Create a large dataset */ + dims[0] = bigcount; + dims[1] = mpi_size; + sid = H5Screate_simple (RANK, dims, NULL); + VRFY((sid >= 0), "H5Screate_simple succeeded"); + dataset = H5Dcreate2(fid, DATASET2, H5T_NATIVE_LLONG, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + VRFY((dataset >= 0), "H5Dcreate2 succeeded"); + H5Sclose(sid); + + block[0] = dims[0]; + block[1] = dims[1]/mpi_size; + stride[0] = block[0]; + stride[1] = block[1]; + count[0] = 1; + count[1] = 1; + start[0] = 0; + start[1] = mpi_rank*block[1]; + + /* create a file dataspace independently */ + file_dataspace = H5Dget_space (dataset); + VRFY((file_dataspace >= 0), "H5Dget_space succeeded"); + ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((ret >= 0), "H5Sset_hyperslab succeeded"); + + /* create a memory dataspace independently */ + mem_dataspace = H5Screate_simple (RANK, block, NULL); + VRFY((mem_dataspace >= 0), ""); + + /* fill the local slab with some trivial data */ + fill_datasets(start, block, wdata); + MESG("data_array initialized"); + if(VERBOSE_MED){ + MESG("data_array created"); + dataset_print(start, block, wdata); + } + + /* set up the collective transfer properties list */ + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + VRFY((xfer_plist >= 0), "H5Pcreate xfer succeeded"); + ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); + if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); + } + + /* write data collectively */ + MESG("writeAll by Col"); + { + int j,k =0; + for (i=0; i < block[0]; i++){ + for (j=0; j < block[1]; j++){ + if(k < 10) { + printf("%lld ", wdata[k]); + k++; + } + } + } + printf("\n"); + } + + ret = H5Dwrite(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace, + xfer_plist, wdata); + VRFY((ret >= 0), "H5Dwrite dataset1 succeeded"); + + /* release all temporary handles. */ + H5Sclose(file_dataspace); + H5Sclose(mem_dataspace); + H5Pclose(xfer_plist); + + ret = H5Dclose(dataset); + VRFY((ret >= 0), "H5Dclose1 succeeded"); + + + + /* ALL selection */ + printf("\nTesting Dataset3 write select ALL proc 0, NONE others\n"); + /* Create a large dataset */ + dims[0] = bigcount; + dims[1] = 1; + sid = H5Screate_simple (RANK, dims, NULL); + VRFY((sid >= 0), "H5Screate_simple succeeded"); + dataset = H5Dcreate2(fid, DATASET3, H5T_NATIVE_LLONG, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + VRFY((dataset >= 0), "H5Dcreate2 succeeded"); + H5Sclose(sid); + + /* create a file dataspace independently */ + file_dataspace = H5Dget_space (dataset); + VRFY((file_dataspace >= 0), "H5Dget_space succeeded"); + if(MAINPROCESS) { + ret = H5Sselect_all(file_dataspace); + VRFY((ret >= 0), "H5Sset_all succeeded"); + } + else { + ret = H5Sselect_none(file_dataspace); + VRFY((ret >= 0), "H5Sset_none succeeded"); + } + + /* create a memory dataspace independently */ + mem_dataspace = H5Screate_simple (RANK, dims, NULL); + VRFY((mem_dataspace >= 0), ""); + if(!MAINPROCESS) { + ret = H5Sselect_none(mem_dataspace); + VRFY((ret >= 0), "H5Sset_none succeeded"); + } + + /* set up the collective transfer properties list */ + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + VRFY((xfer_plist >= 0), "H5Pcreate xfer succeeded"); + ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); + if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); + } + + /* fill the local slab with some trivial data */ + fill_datasets(start, dims, wdata); + MESG("data_array initialized"); + if(VERBOSE_MED){ + MESG("data_array created"); + } + + /* write data collectively */ + MESG("writeAll by process 0"); + { + int j,k =0; + for (i=0; i < block[0]; i++){ + for (j=0; j < block[1]; j++){ + if(k < 10) { + printf("%lld ", wdata[k]); + k++; + } + } + } + printf("\n"); + } + + ret = H5Dwrite(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace, + xfer_plist, wdata); + VRFY((ret >= 0), "H5Dwrite dataset1 succeeded"); + + /* release all temporary handles. */ + H5Sclose(file_dataspace); + H5Sclose(mem_dataspace); + H5Pclose(xfer_plist); + + ret = H5Dclose(dataset); + VRFY((ret >= 0), "H5Dclose1 succeeded"); + + /* Point selection */ + printf("\nTesting Dataset4 write point selection\n"); + /* Create a large dataset */ + dims[0] = bigcount; + dims[1] = mpi_size * 4; + sid = H5Screate_simple (RANK, dims, NULL); + VRFY((sid >= 0), "H5Screate_simple succeeded"); + dataset = H5Dcreate2(fid, DATASET4, H5T_NATIVE_LLONG, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + VRFY((dataset >= 0), "H5Dcreate2 succeeded"); + H5Sclose(sid); + + block[0] = dims[0]/2; + block[1] = 2; + stride[0] = dims[0]/2; + stride[1] = 2; + count[0] = 1; + count[1] = 1; + start[0] = 0; + start[1] = dims[1]/mpi_size * mpi_rank; + + num_points = bigcount; + + coords = (hsize_t *)malloc(num_points * RANK * sizeof(hsize_t)); + VRFY((coords != NULL), "coords malloc succeeded"); + + set_coords (start, count, stride, block, num_points, coords, IN_ORDER); + /* create a file dataspace */ + file_dataspace = H5Dget_space (dataset); + VRFY((file_dataspace >= 0), "H5Dget_space succeeded"); + ret = H5Sselect_elements(file_dataspace, H5S_SELECT_SET, num_points, coords); + VRFY((ret >= 0), "H5Sselect_elements succeeded"); + + if(coords) free(coords); + + fill_datasets(start, block, wdata); + MESG("data_array initialized"); + if(VERBOSE_MED){ + MESG("data_array created"); + dataset_print(start, block, wdata); + } + + /* create a memory dataspace */ + mem_dataspace = H5Screate_simple (1, &bigcount, NULL); + VRFY((mem_dataspace >= 0), ""); + + /* set up the collective transfer properties list */ + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + VRFY((xfer_plist >= 0), "H5Pcreate xfer succeeded"); + ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); + if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); + } + + ret = H5Dwrite(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace, + xfer_plist, wdata); + VRFY((ret >= 0), "H5Dwrite dataset1 succeeded"); + + /* release all temporary handles. */ + H5Sclose(file_dataspace); + H5Sclose(mem_dataspace); + H5Pclose(xfer_plist); + + ret = H5Dclose(dataset); + VRFY((ret >= 0), "H5Dclose1 succeeded"); + + /* Irregular selection */ + /* Need larger memory for data buffer */ + free(wdata); +#if 0 + wdata = (B_DATATYPE *)malloc(bigcount*4*sizeof(B_DATATYPE)); + VRFY((wdata != NULL), "wdata malloc succeeded"); + + printf("\nTesting Dataset5 write irregular selection\n"); + /* Create a large dataset */ + dims[0] = bigcount/6; + dims[1] = mpi_size * 4; + sid = H5Screate_simple (RANK, dims, NULL); + VRFY((sid >= 0), "H5Screate_simple succeeded"); + dataset = H5Dcreate2(fid, DATASET5, H5T_NATIVE_LLONG, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT); + VRFY((dataset >= 0), "H5Dcreate2 succeeded"); + H5Sclose(sid); + + /* first select 1 col in this procs splice */ + block[0] = dims[0]; + block[1] = 1; + stride[0] = block[0]; + stride[1] = block[1]; + count[0] = 1; + count[1] = 1; + start[0] = 0; + start[1] = mpi_rank * 4; + + /* create a file dataspace */ + file_dataspace = H5Dget_space (dataset); + VRFY((file_dataspace >= 0), "H5Dget_space succeeded"); + + dims[1] = 4; + /* create a memory dataspace */ + mem_dataspace = H5Screate_simple (RANK, dims, NULL); + VRFY((mem_dataspace >= 0), ""); + + ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((ret >= 0), "H5Sset_hyperslab succeeded"); + + start[1] = 0; + ret = H5Sselect_hyperslab(mem_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((ret >= 0), "H5Sset_hyperslab succeeded"); + + /* select every other row in the process splice and OR it with + the col selection to create an irregular selection */ + for(h=0 ; h= 0), "H5Sset_hyperslab succeeded"); + + start[1] = 0; + ret = H5Sselect_hyperslab(mem_dataspace, H5S_SELECT_OR, start, stride, count, block); + VRFY((ret >= 0), "H5Sset_hyperslab succeeded"); + } + printf("Setting up for collective transfer\n"); + /* set up the collective transfer properties list */ + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + VRFY((xfer_plist >= 0), "H5Pcreate xfer succeeded"); + ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded"); + if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); + } + + /* fill the local slab with some trivial data */ + fill_datasets(start, dims, wdata); + MESG("data_array initialized"); + if(VERBOSE_MED){ + MESG("data_array created"); + } + + ret = H5Dwrite(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace, + xfer_plist, wdata); + VRFY((ret >= 0), "H5Dwrite dataset1 succeeded"); + + /* release all temporary handles. */ + H5Sclose(file_dataspace); + H5Sclose(mem_dataspace); + H5Pclose(xfer_plist); + + ret = H5Dclose(dataset); + VRFY((ret >= 0), "H5Dclose1 succeeded"); + + free(wdata); +#endif + H5Fclose(fid); +} + +/* + * Example of using the parallel HDF5 library to read two datasets + * in one HDF5 file with collective parallel access support. + * The Datasets are of sizes (number-of-mpi-processes x dim0) x dim1. + * Each process controls only a slab of size dim0 x dim1 within each + * dataset. [Note: not so yet. Datasets are of sizes dim0xdim1 and + * each process controls a hyperslab within.] + */ + +static void +dataset_big_read(void) +{ + hid_t fid; /* HDF5 file ID */ + hid_t acc_tpl; /* File access templates */ + hid_t xfer_plist; /* Dataset transfer properties list */ + hid_t file_dataspace; /* File dataspace ID */ + hid_t mem_dataspace; /* memory dataspace ID */ + hid_t dataset; + B_DATATYPE *rdata = NULL; /* data buffer */ + B_DATATYPE *wdata = NULL; /* expected data buffer */ + hsize_t dims[RANK]; /* dataset dim sizes */ + hsize_t start[RANK]; /* for hyperslab setting */ + hsize_t count[RANK], stride[RANK]; /* for hyperslab setting */ + hsize_t block[RANK]; /* for hyperslab setting */ + int i,j,k; + hsize_t h; + size_t num_points; + hsize_t *coords = NULL; + herr_t ret; /* Generic return value */ + + /* allocate memory for data buffer */ + rdata = (B_DATATYPE *)malloc(bigcount*sizeof(B_DATATYPE)); + VRFY((rdata != NULL), "rdata malloc succeeded"); + wdata = (B_DATATYPE *)malloc(bigcount*sizeof(B_DATATYPE)); + VRFY((wdata != NULL), "wdata malloc succeeded"); + + memset(rdata, 0, bigcount*sizeof(B_DATATYPE)); + + /* setup file access template */ + acc_tpl = H5Pcreate (H5P_FILE_ACCESS); + VRFY((acc_tpl >= 0), "H5P_FILE_ACCESS"); + H5Pset_fapl_mpio(acc_tpl, MPI_COMM_WORLD, MPI_INFO_NULL); + + /* open the file collectively */ + fid=H5Fopen(filename,H5F_ACC_RDONLY,acc_tpl); + VRFY((fid >= 0), "H5Fopen succeeded"); + + /* Release file-access template */ + ret = H5Pclose(acc_tpl); + VRFY((ret >= 0), ""); + + + printf("\nRead Testing Dataset1 by COL\n"); + dataset = H5Dopen2(fid, DATASET1, H5P_DEFAULT); + VRFY((dataset >= 0), "H5Dopen2 succeeded"); + + dims[0] = bigcount; + dims[1] = mpi_size; + /* Each process takes a slabs of cols. */ + block[0] = dims[0]; + block[1] = dims[1]/mpi_size; + stride[0] = block[0]; + stride[1] = block[1]; + count[0] = 1; + count[1] = 1; + start[0] = 0; + start[1] = mpi_rank*block[1]; + + /* create a file dataspace independently */ + file_dataspace = H5Dget_space (dataset); + VRFY((file_dataspace >= 0), "H5Dget_space succeeded"); + ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((ret >= 0), "H5Sset_hyperslab succeeded"); + + /* create a memory dataspace independently */ + mem_dataspace = H5Screate_simple (RANK, block, NULL); + VRFY((mem_dataspace >= 0), ""); + + /* fill dataset with test data */ + fill_datasets(start, block, wdata); + MESG("data_array initialized"); + if(VERBOSE_MED){ + MESG("data_array created"); + } + + /* set up the collective transfer properties list */ + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + VRFY((xfer_plist >= 0), ""); + ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + VRFY((ret >= 0), "H5Pcreate xfer succeeded"); + if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); + } + + /* read data collectively */ + ret = H5Dread(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace, + xfer_plist, rdata); + VRFY((ret >= 0), "H5Dread dataset1 succeeded"); + + { + for (i=0; i < block[0]; i++){ + for (j=0; j < block[1]; j++){ + if(k < 10) { + printf("%lld ", rdata[k]); + k++; + } + } + } + printf("\n"); + } + + /* verify the read data with original expected data */ + ret = verify_data(start, count, stride, block, rdata, wdata); + if(ret) {fprintf(stderr, "verify failed\n"); exit(1);} + + /* release all temporary handles. */ + H5Sclose(file_dataspace); + H5Sclose(mem_dataspace); + H5Pclose(xfer_plist); + ret = H5Dclose(dataset); + VRFY((ret >= 0), "H5Dclose1 succeeded"); + + + printf("\nRead Testing Dataset2 by ROW\n"); + memset(rdata, 0, bigcount*sizeof(B_DATATYPE)); + dataset = H5Dopen2(fid, DATASET2, H5P_DEFAULT); + VRFY((dataset >= 0), "H5Dopen2 succeeded"); + + dims[0] = bigcount; + dims[1] = mpi_size; + /* Each process takes a slabs of rows. */ + block[0] = dims[0]/mpi_size; + block[1] = dims[1]; + stride[0] = block[0]; + stride[1] = block[1]; + count[0] = 1; + count[1] = 1; + start[0] = mpi_rank*block[0]; + start[1] = 0; + + /* create a file dataspace independently */ + file_dataspace = H5Dget_space (dataset); + VRFY((file_dataspace >= 0), "H5Dget_space succeeded"); + ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((ret >= 0), "H5Sset_hyperslab succeeded"); + + /* create a memory dataspace independently */ + mem_dataspace = H5Screate_simple (RANK, block, NULL); + VRFY((mem_dataspace >= 0), ""); + + /* fill dataset with test data */ + fill_datasets(start, block, wdata); + MESG("data_array initialized"); + if(VERBOSE_MED){ + MESG("data_array created"); + } + + /* set up the collective transfer properties list */ + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + VRFY((xfer_plist >= 0), ""); + ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + VRFY((ret >= 0), "H5Pcreate xfer succeeded"); + if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); + } + + /* read data collectively */ + ret = H5Dread(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace, + xfer_plist, rdata); + VRFY((ret >= 0), "H5Dread dataset2 succeeded"); + + { + for (i=0; i < block[0]; i++){ + for (j=0; j < block[1]; j++){ + if(k < 10) { + printf("%lld ", rdata[k]); + k++; + } + } + } + printf("\n"); + } + + /* verify the read data with original expected data */ + ret = verify_data(start, count, stride, block, rdata, wdata); + if(ret) {fprintf(stderr, "verify failed\n"); exit(1);} + + /* release all temporary handles. */ + H5Sclose(file_dataspace); + H5Sclose(mem_dataspace); + H5Pclose(xfer_plist); + ret = H5Dclose(dataset); + VRFY((ret >= 0), "H5Dclose1 succeeded"); + + + printf("\nRead Testing Dataset3 read select ALL proc 0, NONE others\n"); + memset(rdata, 0, bigcount*sizeof(B_DATATYPE)); + dataset = H5Dopen2(fid, DATASET3, H5P_DEFAULT); + VRFY((dataset >= 0), "H5Dopen2 succeeded"); + + dims[0] = bigcount; + dims[1] = 1; + + /* create a file dataspace independently */ + file_dataspace = H5Dget_space (dataset); + VRFY((file_dataspace >= 0), "H5Dget_space succeeded"); + if(MAINPROCESS) { + ret = H5Sselect_all(file_dataspace); + VRFY((ret >= 0), "H5Sset_all succeeded"); + } + else { + ret = H5Sselect_none(file_dataspace); + VRFY((ret >= 0), "H5Sset_none succeeded"); + } + + /* create a memory dataspace independently */ + mem_dataspace = H5Screate_simple (RANK, dims, NULL); + VRFY((mem_dataspace >= 0), ""); + if(!MAINPROCESS) { + ret = H5Sselect_none(mem_dataspace); + VRFY((ret >= 0), "H5Sset_none succeeded"); + } + + /* fill dataset with test data */ + fill_datasets(start, dims, wdata); + MESG("data_array initialized"); + if(VERBOSE_MED){ + MESG("data_array created"); + } + + /* set up the collective transfer properties list */ + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + VRFY((xfer_plist >= 0), ""); + ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + VRFY((ret >= 0), "H5Pcreate xfer succeeded"); + if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); + } + + /* read data collectively */ + ret = H5Dread(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace, + xfer_plist, rdata); + VRFY((ret >= 0), "H5Dread dataset3 succeeded"); + + { + for (i=0; i < block[0]; i++){ + for (j=0; j < block[1]; j++){ + if(k < 10) { + printf("%lld ", rdata[k]); + k++; + } + } + } + printf("\n"); + } + + if(MAINPROCESS) { + /* verify the read data with original expected data */ + ret = verify_data(start, count, stride, block, rdata, wdata); + if(ret) {fprintf(stderr, "verify failed\n"); exit(1);} + } + + /* release all temporary handles. */ + H5Sclose(file_dataspace); + H5Sclose(mem_dataspace); + H5Pclose(xfer_plist); + ret = H5Dclose(dataset); + VRFY((ret >= 0), "H5Dclose1 succeeded"); + + printf("\nRead Testing Dataset4 with Point selection\n"); + dataset = H5Dopen2(fid, DATASET4, H5P_DEFAULT); + VRFY((dataset >= 0), "H5Dopen2 succeeded"); + + dims[0] = bigcount; + dims[1] = mpi_size * 4; + + block[0] = dims[0]/2; + block[1] = 2; + stride[0] = dims[0]/2; + stride[1] = 2; + count[0] = 1; + count[1] = 1; + start[0] = 0; + start[1] = dims[1]/mpi_size * mpi_rank; + + fill_datasets(start, block, wdata); + MESG("data_array initialized"); + if(VERBOSE_MED){ + MESG("data_array created"); + dataset_print(start, block, wdata); + } + + num_points = bigcount; + + coords = (hsize_t *)malloc(num_points * RANK * sizeof(hsize_t)); + VRFY((coords != NULL), "coords malloc succeeded"); + + set_coords (start, count, stride, block, num_points, coords, IN_ORDER); + /* create a file dataspace */ + file_dataspace = H5Dget_space (dataset); + VRFY((file_dataspace >= 0), "H5Dget_space succeeded"); + ret = H5Sselect_elements(file_dataspace, H5S_SELECT_SET, num_points, coords); + VRFY((ret >= 0), "H5Sselect_elements succeeded"); + + if(coords) free(coords); + + /* create a memory dataspace */ + mem_dataspace = H5Screate_simple (1, &bigcount, NULL); + VRFY((mem_dataspace >= 0), ""); + + /* set up the collective transfer properties list */ + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + VRFY((xfer_plist >= 0), ""); + ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + VRFY((ret >= 0), "H5Pcreate xfer succeeded"); + if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); + } + + /* read data collectively */ + ret = H5Dread(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace, + xfer_plist, rdata); + VRFY((ret >= 0), "H5Dread dataset1 succeeded"); + + ret = verify_data(start, count, stride, block, rdata, wdata); + if(ret) {fprintf(stderr, "verify failed\n"); exit(1);} + + /* release all temporary handles. */ + H5Sclose(file_dataspace); + H5Sclose(mem_dataspace); + H5Pclose(xfer_plist); + ret = H5Dclose(dataset); + VRFY((ret >= 0), "H5Dclose1 succeeded"); + + printf("\nRead Testing Dataset5 with Irregular selection\n"); + /* Need larger memory for data buffer */ + free(wdata); + free(rdata); +#if 0 + wdata = (B_DATATYPE *)malloc(bigcount*4*sizeof(B_DATATYPE)); + VRFY((wdata != NULL), "wdata malloc succeeded"); + rdata = (B_DATATYPE *)malloc(bigcount*4*sizeof(B_DATATYPE)); + VRFY((rdata != NULL), "rdata malloc succeeded"); + + dataset = H5Dopen2(fid, DATASET5, H5P_DEFAULT); + VRFY((dataset >= 0), "H5Dopen2 succeeded"); + + dims[0] = bigcount; + dims[1] = mpi_size * 4; + + /* first select 1 col in this proc splice */ + block[0] = dims[0]; + block[1] = 1; + stride[0] = block[0]; + stride[1] = block[1]; + count[0] = 1; + count[1] = 1; + start[0] = 0; + start[1] = mpi_rank * 4; + + /* get file dataspace */ + file_dataspace = H5Dget_space (dataset); + VRFY((file_dataspace >= 0), "H5Dget_space succeeded"); + + /* create a memory dataspace */ + dims[1] = 4; + mem_dataspace = H5Screate_simple (RANK, dims, NULL); + VRFY((mem_dataspace >= 0), ""); + + ret = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((ret >= 0), "H5Sset_hyperslab succeeded"); + + start[1] = 0; + ret = H5Sselect_hyperslab(mem_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((ret >= 0), "H5Sset_hyperslab succeeded"); + + /* select every other row in the process splice and OR it with + the col selection to create an irregular selection */ + for(h=0 ; h= 0), "H5Sset_hyperslab succeeded"); + + start[1] = 0; + ret = H5Sselect_hyperslab(mem_dataspace, H5S_SELECT_OR, start, stride, count, block); + VRFY((ret >= 0), "H5Sset_hyperslab succeeded"); + + //fprintf(stderr, "%d: %d - %d\n", mpi_rank, (int)h, (int)H5Sget_select_npoints(mem_dataspace)); + } + + /* set up the collective transfer properties list */ + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + VRFY((xfer_plist >= 0), ""); + ret = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + VRFY((ret >= 0), "H5Pcreate xfer succeeded"); + if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { + ret = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((ret>= 0),"set independent IO collectively succeeded"); + } + + /* read data collectively */ + ret = H5Dread(dataset, H5T_NATIVE_LLONG, mem_dataspace, file_dataspace, + xfer_plist, rdata); + VRFY((ret >= 0), "H5Dread dataset1 succeeded"); + + /* fill dataset with test data */ + fill_datasets(start, dims, wdata); + MESG("data_array initialized"); + if(VERBOSE_MED){ + MESG("data_array created"); + } + + + + /* verify the read data with original expected data */ + block[0] = dims[0]; + block[1] = 1; + stride[0] = block[0]; + stride[1] = block[1]; + count[0] = 1; + count[1] = 1; + start[0] = 0; + start[1] = 0; + ret = verify_data(start, count, stride, block, rdata, wdata); + if(ret) {fprintf(stderr, "verify failed\n"); exit(1);} + + for(h=0 ; h= 0), "H5Dclose1 succeeded"); + + H5Fclose(fid); + + /* release data buffers */ + if(rdata) free(rdata); + if(wdata) free(wdata); +#endif +} /* dataset_large_readAll */ + + +/* + * Create the appropriate File access property list + */ +hid_t +create_faccess_plist(MPI_Comm comm, MPI_Info info, int l_facc_type) +{ + hid_t ret_pl = -1; + herr_t ret; /* generic return value */ + int mpi_rank; /* mpi variables */ + + /* need the rank for error checking macros */ + MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank); + + ret_pl = H5Pcreate (H5P_FILE_ACCESS); + VRFY((ret_pl >= 0), "H5P_FILE_ACCESS"); + + if (l_facc_type == FACC_DEFAULT) + return (ret_pl); + + if (l_facc_type == FACC_MPIO){ + /* set Parallel access with communicator */ + ret = H5Pset_fapl_mpio(ret_pl, comm, info); + VRFY((ret >= 0), ""); + ret = H5Pset_all_coll_metadata_ops(ret_pl, TRUE); + VRFY((ret >= 0), ""); + ret = H5Pset_coll_metadata_write(ret_pl, TRUE); + VRFY((ret >= 0), ""); + return(ret_pl); + } + + if (l_facc_type == (FACC_MPIO | FACC_SPLIT)){ + hid_t mpio_pl; + + mpio_pl = H5Pcreate (H5P_FILE_ACCESS); + VRFY((mpio_pl >= 0), ""); + /* set Parallel access with communicator */ + ret = H5Pset_fapl_mpio(mpio_pl, comm, info); + VRFY((ret >= 0), ""); + + /* setup file access template */ + ret_pl = H5Pcreate (H5P_FILE_ACCESS); + VRFY((ret_pl >= 0), ""); + /* set Parallel access with communicator */ + ret = H5Pset_fapl_split(ret_pl, ".meta", mpio_pl, ".raw", mpio_pl); + VRFY((ret >= 0), "H5Pset_fapl_split succeeded"); + H5Pclose(mpio_pl); + return(ret_pl); + } + + /* unknown file access types */ + return (ret_pl); +} + + +/*------------------------------------------------------------------------- + * Function: coll_chunk1 + * + * Purpose: Wrapper to test the collective chunk IO for regular JOINT + selection with a single chunk + * + * Return: Success: 0 + * + * Failure: -1 + * + * Programmer: Unknown + * July 12th, 2004 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +/* ------------------------------------------------------------------------ + * Descriptions for the selection: One big singluar selection inside one chunk + * Two dimensions, + * + * dim1 = space_dim1(5760)*mpi_size + * dim2 = space_dim2(3) + * chunk_dim1 = dim1 + * chunk_dim2 = dim2 + * block = 1 for all dimensions + * stride = 1 for all dimensions + * count0 = space_dim1(5760) + * count1 = space_dim2(3) + * start0 = mpi_rank*space_dim1 + * start1 = 0 + * ------------------------------------------------------------------------ + */ + +void +coll_chunk1(void) +{ + if (MAINPROCESS) + printf("coll_chunk1\n"); + + coll_chunktest(filename, 1, BYROW_CONT, API_NONE, HYPER, HYPER, OUT_OF_ORDER); + coll_chunktest(filename, 1, BYROW_CONT, API_NONE, HYPER, POINT, OUT_OF_ORDER); + coll_chunktest(filename, 1, BYROW_CONT, API_NONE, POINT, ALL, OUT_OF_ORDER); + coll_chunktest(filename, 1, BYROW_CONT, API_NONE, POINT, POINT, OUT_OF_ORDER); + coll_chunktest(filename, 1, BYROW_CONT, API_NONE, POINT, HYPER, OUT_OF_ORDER); + + coll_chunktest(filename, 1, BYROW_CONT, API_NONE, POINT, ALL, IN_ORDER); + coll_chunktest(filename, 1, BYROW_CONT, API_NONE, POINT, POINT, IN_ORDER); + coll_chunktest(filename, 1, BYROW_CONT, API_NONE, POINT, HYPER, IN_ORDER); +} + + +/*------------------------------------------------------------------------- + * Function: coll_chunk2 + * + * Purpose: Wrapper to test the collective chunk IO for regular DISJOINT + selection with a single chunk + * + * Return: Success: 0 + * + * Failure: -1 + * + * Programmer: Unknown + * July 12th, 2004 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + + /* ------------------------------------------------------------------------ + * Descriptions for the selection: many disjoint selections inside one chunk + * Two dimensions, + * + * dim1 = space_dim1*mpi_size(5760) + * dim2 = space_dim2(3) + * chunk_dim1 = dim1 + * chunk_dim2 = dim2 + * block = 1 for all dimensions + * stride = 3 for all dimensions + * count0 = space_dim1/stride0(5760/3) + * count1 = space_dim2/stride(3/3 = 1) + * start0 = mpi_rank*space_dim1 + * start1 = 0 + * + * ------------------------------------------------------------------------ + */ +void +coll_chunk2(void) +{ + if (MAINPROCESS) + printf("coll_chunk2\n"); + + coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, HYPER, HYPER, OUT_OF_ORDER); + coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, HYPER, POINT, OUT_OF_ORDER); + coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, POINT, ALL, OUT_OF_ORDER); + coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, POINT, POINT, OUT_OF_ORDER); + coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, POINT, HYPER, OUT_OF_ORDER); + + coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, POINT, ALL, IN_ORDER); + coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, POINT, POINT, IN_ORDER); + coll_chunktest(filename, 1, BYROW_DISCONT, API_NONE, POINT, HYPER, IN_ORDER); +} + + +/*------------------------------------------------------------------------- + * Function: coll_chunk3 + * + * Purpose: Wrapper to test the collective chunk IO for regular JOINT + selection with at least number of 2*mpi_size chunks + * + * Return: Success: 0 + * + * Failure: -1 + * + * Programmer: Unknown + * July 12th, 2004 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +/* ------------------------------------------------------------------------ + * Descriptions for the selection: one singular selection accross many chunks + * Two dimensions, Num of chunks = 2* mpi_size + * + * dim1 = space_dim1*mpi_size + * dim2 = space_dim2(3) + * chunk_dim1 = space_dim1 + * chunk_dim2 = dim2/2 + * block = 1 for all dimensions + * stride = 1 for all dimensions + * count0 = space_dim1 + * count1 = space_dim2(3) + * start0 = mpi_rank*space_dim1 + * start1 = 0 + * + * ------------------------------------------------------------------------ + */ + +void +coll_chunk3(void) +{ + if (MAINPROCESS) + printf("coll_chunk3\n"); + + coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, HYPER, HYPER, OUT_OF_ORDER); + coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, HYPER, POINT, OUT_OF_ORDER); + coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, POINT, ALL, OUT_OF_ORDER); + coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, POINT, POINT, OUT_OF_ORDER); + coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, POINT, HYPER, OUT_OF_ORDER); + + coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, POINT, ALL, IN_ORDER); + coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, POINT, POINT, IN_ORDER); + coll_chunktest(filename, mpi_size, BYROW_CONT, API_NONE, POINT, HYPER, IN_ORDER); +} + + +//------------------------------------------------------------------------- +// Borrowed/Modified (slightly) from t_coll_chunk.c +/*------------------------------------------------------------------------- + * Function: coll_chunktest + * + * Purpose: The real testing routine for regular selection of collective + chunking storage + testing both write and read, + If anything fails, it may be read or write. There is no + separation test between read and write. + * + * Return: Success: 0 + * + * Failure: -1 + * + * Modifications: + * Remove invalid temporary property checkings for API_LINK_HARD and + * API_LINK_TRUE cases. + * Programmer: Jonathan Kim + * Date: 2012-10-10 + * + * Programmer: Unknown + * July 12th, 2004 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ + +static void +coll_chunktest(const char* filename, + int chunk_factor, + int select_factor, + int api_option, + int file_selection, + int mem_selection, + int mode) +{ + hid_t file, dataset, file_dataspace, mem_dataspace; + hid_t acc_plist,xfer_plist,crp_plist; + + hsize_t dims[RANK], chunk_dims[RANK]; + int* data_array1 = NULL; + int* data_origin1 = NULL; + + hsize_t start[RANK],count[RANK],stride[RANK],block[RANK]; + +#ifdef H5_HAVE_INSTRUMENTED_LIBRARY + unsigned prop_value; +#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */ + + herr_t status; + MPI_Comm comm = MPI_COMM_WORLD; + MPI_Info info = MPI_INFO_NULL; + + size_t num_points; /* for point selection */ + hsize_t *coords = NULL; /* for point selection */ + hsize_t current_dims; /* for point selection */ + int i; + + /* Create the data space */ + + acc_plist = create_faccess_plist(comm,info,facc_type); + VRFY((acc_plist >= 0),""); + + file = H5Fcreate(filename,H5F_ACC_TRUNC,H5P_DEFAULT,acc_plist); + VRFY((file >= 0),"H5Fcreate succeeded"); + + status = H5Pclose(acc_plist); + VRFY((status >= 0),""); + + /* setup dimensionality object */ + dims[0] = space_dim1*mpi_size; + dims[1] = space_dim2; + + /* allocate memory for data buffer */ + data_array1 = (int *)HDmalloc(dims[0] * dims[1] * sizeof(int)); + VRFY((data_array1 != NULL), "data_array1 malloc succeeded"); + + /* set up dimensions of the slab this process accesses */ + ccslab_set(mpi_rank, mpi_size, start, count, stride, block, select_factor); + + /* set up the coords array selection */ + num_points = block[0] * block[1] * count[0] * count[1]; + coords = (hsize_t *)HDmalloc(num_points * RANK * sizeof(hsize_t)); + VRFY((coords != NULL), "coords malloc succeeded"); + point_set(start, count, stride, block, num_points, coords, mode); + + file_dataspace = H5Screate_simple(2, dims, NULL); + VRFY((file_dataspace >= 0), "file dataspace created succeeded"); + + if(ALL != mem_selection) { + mem_dataspace = H5Screate_simple(2, dims, NULL); + VRFY((mem_dataspace >= 0), "mem dataspace created succeeded"); + } + else { + current_dims = num_points; + mem_dataspace = H5Screate_simple (1, ¤t_dims, NULL); + VRFY((mem_dataspace >= 0), "mem_dataspace create succeeded"); + } + + crp_plist = H5Pcreate(H5P_DATASET_CREATE); + VRFY((crp_plist >= 0),""); + + /* Set up chunk information. */ + chunk_dims[0] = dims[0]/chunk_factor; + + /* to decrease the testing time, maintain bigger chunk size */ + (chunk_factor == 1) ? (chunk_dims[1] = space_dim2) : (chunk_dims[1] = space_dim2/2); + status = H5Pset_chunk(crp_plist, 2, chunk_dims); + VRFY((status >= 0),"chunk creation property list succeeded"); + + dataset = H5Dcreate2(file, DSET_COLLECTIVE_CHUNK_NAME, H5T_NATIVE_INT, + file_dataspace, H5P_DEFAULT, crp_plist, H5P_DEFAULT); + VRFY((dataset >= 0),"dataset created succeeded"); + + status = H5Pclose(crp_plist); + VRFY((status >= 0), ""); + + /*put some trivial data in the data array */ + ccdataset_fill(start, stride, count,block, data_array1, mem_selection); + + MESG("data_array initialized"); + + switch (file_selection) { + case HYPER: + status = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((status >= 0),"hyperslab selection succeeded"); + break; + + case POINT: + if (num_points) { + status = H5Sselect_elements(file_dataspace, H5S_SELECT_SET, num_points, coords); + VRFY((status >= 0),"Element selection succeeded"); + } + else { + status = H5Sselect_none(file_dataspace); + VRFY((status >= 0),"none selection succeeded"); + } + break; + + case ALL: + status = H5Sselect_all(file_dataspace); + VRFY((status >= 0), "H5Sselect_all succeeded"); + break; + } + + switch (mem_selection) { + case HYPER: + status = H5Sselect_hyperslab(mem_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((status >= 0),"hyperslab selection succeeded"); + break; + + case POINT: + if (num_points) { + status = H5Sselect_elements(mem_dataspace, H5S_SELECT_SET, num_points, coords); + VRFY((status >= 0),"Element selection succeeded"); + } + else { + status = H5Sselect_none(mem_dataspace); + VRFY((status >= 0),"none selection succeeded"); + } + break; + + case ALL: + status = H5Sselect_all(mem_dataspace); + VRFY((status >= 0), "H5Sselect_all succeeded"); + break; + } + + /* set up the collective transfer property list */ + xfer_plist = H5Pcreate(H5P_DATASET_XFER); + VRFY((xfer_plist >= 0), ""); + + status = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + VRFY((status>= 0),"MPIO collective transfer property succeeded"); + if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { + status = H5Pset_dxpl_mpio_collective_opt(xfer_plist, H5FD_MPIO_INDIVIDUAL_IO); + VRFY((status>= 0),"set independent IO collectively succeeded"); + } + + switch(api_option){ + case API_LINK_HARD: + status = H5Pset_dxpl_mpio_chunk_opt(xfer_plist,H5FD_MPIO_CHUNK_ONE_IO); + VRFY((status>= 0),"collective chunk optimization succeeded"); + break; + + case API_MULTI_HARD: + status = H5Pset_dxpl_mpio_chunk_opt(xfer_plist,H5FD_MPIO_CHUNK_MULTI_IO); + VRFY((status>= 0),"collective chunk optimization succeeded "); + break; + + case API_LINK_TRUE: + status = H5Pset_dxpl_mpio_chunk_opt_num(xfer_plist,2); + VRFY((status>= 0),"collective chunk optimization set chunk number succeeded"); + break; + + case API_LINK_FALSE: + status = H5Pset_dxpl_mpio_chunk_opt_num(xfer_plist,6); + VRFY((status>= 0),"collective chunk optimization set chunk number succeeded"); + break; + + case API_MULTI_COLL: + status = H5Pset_dxpl_mpio_chunk_opt_num(xfer_plist,8);/* make sure it is using multi-chunk IO */ + VRFY((status>= 0),"collective chunk optimization set chunk number succeeded"); + status = H5Pset_dxpl_mpio_chunk_opt_ratio(xfer_plist,50); + VRFY((status>= 0),"collective chunk optimization set chunk ratio succeeded"); + break; + + case API_MULTI_IND: + status = H5Pset_dxpl_mpio_chunk_opt_num(xfer_plist,8);/* make sure it is using multi-chunk IO */ + VRFY((status>= 0),"collective chunk optimization set chunk number succeeded"); + status = H5Pset_dxpl_mpio_chunk_opt_ratio(xfer_plist,100); + VRFY((status>= 0),"collective chunk optimization set chunk ratio succeeded"); + break; + + default: + ; + } + +#ifdef H5_HAVE_INSTRUMENTED_LIBRARY + if(facc_type == FACC_MPIO) { + switch(api_option) { + case API_LINK_HARD: + prop_value = H5D_XFER_COLL_CHUNK_DEF; + status = H5Pinsert2(xfer_plist, H5D_XFER_COLL_CHUNK_LINK_HARD_NAME, H5D_XFER_COLL_CHUNK_SIZE, &prop_value, + NULL, NULL, NULL, NULL, NULL, NULL); + VRFY((status >= 0),"testing property list inserted succeeded"); + break; + + case API_MULTI_HARD: + prop_value = H5D_XFER_COLL_CHUNK_DEF; + status = H5Pinsert2(xfer_plist, H5D_XFER_COLL_CHUNK_MULTI_HARD_NAME, H5D_XFER_COLL_CHUNK_SIZE, &prop_value, + NULL, NULL, NULL, NULL, NULL, NULL); + VRFY((status >= 0),"testing property list inserted succeeded"); + break; + + case API_LINK_TRUE: + prop_value = H5D_XFER_COLL_CHUNK_DEF; + status = H5Pinsert2(xfer_plist, H5D_XFER_COLL_CHUNK_LINK_NUM_TRUE_NAME, H5D_XFER_COLL_CHUNK_SIZE, &prop_value, + NULL, NULL, NULL, NULL, NULL, NULL); + VRFY((status >= 0),"testing property list inserted succeeded"); + break; + + case API_LINK_FALSE: + prop_value = H5D_XFER_COLL_CHUNK_DEF; + status = H5Pinsert2(xfer_plist, H5D_XFER_COLL_CHUNK_LINK_NUM_FALSE_NAME, H5D_XFER_COLL_CHUNK_SIZE, &prop_value, + NULL, NULL, NULL, NULL, NULL, NULL); + VRFY((status >= 0),"testing property list inserted succeeded"); + break; + + case API_MULTI_COLL: + prop_value = H5D_XFER_COLL_CHUNK_DEF; + status = H5Pinsert2(xfer_plist, H5D_XFER_COLL_CHUNK_MULTI_RATIO_COLL_NAME, H5D_XFER_COLL_CHUNK_SIZE, &prop_value, + NULL, NULL, NULL, NULL, NULL, NULL); + VRFY((status >= 0),"testing property list inserted succeeded"); + break; + + case API_MULTI_IND: + prop_value = H5D_XFER_COLL_CHUNK_DEF; + status = H5Pinsert2(xfer_plist, H5D_XFER_COLL_CHUNK_MULTI_RATIO_IND_NAME, H5D_XFER_COLL_CHUNK_SIZE, &prop_value, + NULL, NULL, NULL, NULL, NULL, NULL); + VRFY((status >= 0),"testing property list inserted succeeded"); + break; + + default: + ; + } + } +#endif + + /* write data collectively */ + status = H5Dwrite(dataset, H5T_NATIVE_INT, mem_dataspace, file_dataspace, + xfer_plist, data_array1); + VRFY((status >= 0),"dataset write succeeded"); + +#ifdef H5_HAVE_INSTRUMENTED_LIBRARY + if(facc_type == FACC_MPIO) { + switch(api_option){ + case API_LINK_HARD: + status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_LINK_HARD_NAME,&prop_value); + VRFY((status >= 0),"testing property list get succeeded"); + VRFY((prop_value == 0),"API to set LINK COLLECTIVE IO directly succeeded"); + break; + + case API_MULTI_HARD: + status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_MULTI_HARD_NAME,&prop_value); + VRFY((status >= 0),"testing property list get succeeded"); + VRFY((prop_value == 0),"API to set MULTI-CHUNK COLLECTIVE IO optimization succeeded"); + break; + + case API_LINK_TRUE: + status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_LINK_NUM_TRUE_NAME,&prop_value); + VRFY((status >= 0),"testing property list get succeeded"); + VRFY((prop_value == 0),"API to set LINK COLLECTIVE IO succeeded"); + break; + + case API_LINK_FALSE: + status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_LINK_NUM_FALSE_NAME,&prop_value); + VRFY((status >= 0),"testing property list get succeeded"); + VRFY((prop_value == 0),"API to set LINK IO transferring to multi-chunk IO succeeded"); + break; + + case API_MULTI_COLL: + status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_MULTI_RATIO_COLL_NAME,&prop_value); + VRFY((status >= 0),"testing property list get succeeded"); + VRFY((prop_value == 0),"API to set MULTI-CHUNK COLLECTIVE IO with optimization succeeded"); + break; + + case API_MULTI_IND: + status = H5Pget(xfer_plist,H5D_XFER_COLL_CHUNK_MULTI_RATIO_IND_NAME,&prop_value); + VRFY((status >= 0),"testing property list get succeeded"); + VRFY((prop_value == 0),"API to set MULTI-CHUNK IO transferring to independent IO succeeded"); + break; + + default: + ; + } + } +#endif + + status = H5Dclose(dataset); + VRFY((status >= 0),""); + + status = H5Pclose(xfer_plist); + VRFY((status >= 0),"property list closed"); + + status = H5Sclose(file_dataspace); + VRFY((status >= 0),""); + + status = H5Sclose(mem_dataspace); + VRFY((status >= 0),""); + + + status = H5Fclose(file); + VRFY((status >= 0),""); + + if (data_array1) HDfree(data_array1); + + /* Use collective read to verify the correctness of collective write. */ + + /* allocate memory for data buffer */ + data_array1 = (int *)HDmalloc(dims[0]*dims[1]*sizeof(int)); + VRFY((data_array1 != NULL), "data_array1 malloc succeeded"); + + /* allocate memory for data buffer */ + data_origin1 = (int *)HDmalloc(dims[0]*dims[1]*sizeof(int)); + VRFY((data_origin1 != NULL), "data_origin1 malloc succeeded"); + + acc_plist = create_faccess_plist(comm, info, facc_type); + VRFY((acc_plist >= 0),"MPIO creation property list succeeded"); + + file = H5Fopen(filename,H5F_ACC_RDONLY,acc_plist); + VRFY((file >= 0),"H5Fcreate succeeded"); + + status = H5Pclose(acc_plist); + VRFY((status >= 0),""); + + /* open the collective dataset*/ + dataset = H5Dopen2(file, DSET_COLLECTIVE_CHUNK_NAME, H5P_DEFAULT); + VRFY((dataset >= 0), ""); + + /* set up dimensions of the slab this process accesses */ + ccslab_set(mpi_rank, mpi_size, start, count, stride, block, select_factor); + + /* obtain the file and mem dataspace*/ + file_dataspace = H5Dget_space (dataset); + VRFY((file_dataspace >= 0), ""); + + if (ALL != mem_selection) { + mem_dataspace = H5Dget_space (dataset); + VRFY((mem_dataspace >= 0), ""); + } + else { + current_dims = num_points; + mem_dataspace = H5Screate_simple (1, ¤t_dims, NULL); + VRFY((mem_dataspace >= 0), "mem_dataspace create succeeded"); + } + + switch (file_selection) { + case HYPER: + status = H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((status >= 0),"hyperslab selection succeeded"); + break; + + case POINT: + if (num_points) { + status = H5Sselect_elements(file_dataspace, H5S_SELECT_SET, num_points, coords); + VRFY((status >= 0),"Element selection succeeded"); + } + else { + status = H5Sselect_none(file_dataspace); + VRFY((status >= 0),"none selection succeeded"); + } + break; + + case ALL: + status = H5Sselect_all(file_dataspace); + VRFY((status >= 0), "H5Sselect_all succeeded"); + break; + } + + switch (mem_selection) { + case HYPER: + status = H5Sselect_hyperslab(mem_dataspace, H5S_SELECT_SET, start, stride, count, block); + VRFY((status >= 0),"hyperslab selection succeeded"); + break; + + case POINT: + if (num_points) { + status = H5Sselect_elements(mem_dataspace, H5S_SELECT_SET, num_points, coords); + VRFY((status >= 0),"Element selection succeeded"); + } + else { + status = H5Sselect_none(mem_dataspace); + VRFY((status >= 0),"none selection succeeded"); + } + break; + + case ALL: + status = H5Sselect_all(mem_dataspace); + VRFY((status >= 0), "H5Sselect_all succeeded"); + break; + } + + /* fill dataset with test data */ + ccdataset_fill(start, stride,count,block, data_origin1, mem_selection); + xfer_plist = H5Pcreate (H5P_DATASET_XFER); + VRFY((xfer_plist >= 0),""); + + status = H5Pset_dxpl_mpio(xfer_plist, H5FD_MPIO_COLLECTIVE); + VRFY((status>= 0),"MPIO collective transfer property succeeded"); + if(dxfer_coll_type == DXFER_INDEPENDENT_IO) { + status = H5Pset_dxpl_mpio_collective_opt(xfer_plist,H5FD_MPIO_INDIVIDUAL_IO); + VRFY((status>= 0),"set independent IO collectively succeeded"); + } + + status = H5Dread(dataset, H5T_NATIVE_INT, mem_dataspace, file_dataspace, + xfer_plist, data_array1); + VRFY((status >=0),"dataset read succeeded"); + + /* verify the read data with original expected data */ + status = ccdataset_vrfy(start, count, stride, block, data_array1, data_origin1, mem_selection); + if (status) nerrors++; + + status = H5Pclose(xfer_plist); + VRFY((status >= 0),"property list closed"); + + /* close dataset collectively */ + status=H5Dclose(dataset); + VRFY((status >= 0), "H5Dclose"); + + /* release all IDs created */ + status = H5Sclose(file_dataspace); + VRFY((status >= 0),"H5Sclose"); + + status = H5Sclose(mem_dataspace); + VRFY((status >= 0),"H5Sclose"); + + /* close the file collectively */ + status = H5Fclose(file); + VRFY((status >= 0),"H5Fclose"); + + /* release data buffers */ + if(coords) HDfree(coords); + if(data_array1) HDfree(data_array1); + if(data_origin1) HDfree(data_origin1); + +} + + + + + +int main(int argc, char **argv) +{ + + hsize_t newsize = 1048576; + hsize_t oldsize = H5S_mpio_set_bigio_count(newsize); + if (newsize != oldsize) { + bigcount = newsize * 2; + } + + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD,&mpi_size); + MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank); + + dataset_big_write(); + MPI_Barrier(MPI_COMM_WORLD); + + dataset_big_read(); + MPI_Barrier(MPI_COMM_WORLD); + + oldsize = H5S_mpio_set_bigio_count(16384); + + coll_chunk1(); + MPI_Barrier(MPI_COMM_WORLD); + coll_chunk2(); + MPI_Barrier(MPI_COMM_WORLD); + coll_chunk3(); + MPI_Barrier(MPI_COMM_WORLD); + + MPI_Finalize(); + + return 0; +} + -- cgit v0.12 From 64d33e5e6e4b4270a3982c1be384cb41a0aa4c3b Mon Sep 17 00:00:00 2001 From: Richard Warren Date: Wed, 5 Jul 2017 16:19:57 -0400 Subject: Commited changes to the development branch here to allow a pull request to be published --- src/H5Smpio.c | 504 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 458 insertions(+), 46 deletions(-) diff --git a/src/H5Smpio.c b/src/H5Smpio.c index c24c455..7319a80 100644 --- a/src/H5Smpio.c +++ b/src/H5Smpio.c @@ -33,7 +33,7 @@ #include "H5Oprivate.h" /* Object headers */ #include "H5Pprivate.h" /* Property lists */ #include "H5Spkg.h" /* Dataspaces */ -#include "H5VMprivate.h" /* Vector and array functions */ +#include "H5VMprivate.h" /* Vector and array functions */ #ifdef H5_HAVE_PARALLEL @@ -55,9 +55,42 @@ static herr_t H5S_mpio_span_hyper_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type, int *count, hbool_t *is_derived_type); static herr_t H5S_obtain_datatype(const hsize_t down[], H5S_hyper_span_t* span, const MPI_Datatype *elmt_type, MPI_Datatype *span_type, size_t elmt_size); +static herr_t H5S_mpio_create_large_type (hsize_t, MPI_Aint, MPI_Datatype , MPI_Datatype *); + #define H5S_MPIO_INITIAL_ALLOC_COUNT 256 +#define TWO_GIG_LIMIT 2147483648 + +#ifndef H5S_MAX_MPI_COUNT +#define H5S_MAX_MPI_COUNT 536870911 /* (2^29)-1 */ +#endif + +static hsize_t bigio_count = H5S_MAX_MPI_COUNT; + +/*------------------------------------------------------------------------- + * Function: H5S_mpio_set_bigio_count + * + * Purpose: Allow us to programatically change the switch point + * when we utilize derived datatypes. This is of + * particular interest for allowing nightly testing + * + * Return: the current/previous value of bigio_count. + * + * Programmer: Richard Warren, March 10, 2017 + * + *------------------------------------------------------------------------- + */ +hsize_t +H5S_mpio_set_bigio_count(hsize_t new_count) +{ + hsize_t orig_count = bigio_count; + if ((new_count > 0) && (new_count < TWO_GIG_LIMIT)) { + bigio_count = new_count; + } + return orig_count; +} + /*------------------------------------------------------------------------- * Function: H5S_mpio_all_type @@ -72,6 +105,11 @@ static herr_t H5S_obtain_datatype(const hsize_t down[], H5S_hyper_span_t* span, * *is_derived_type 0 if MPI primitive type, 1 if derived * * Programmer: rky 980813 + * Modifications: + * Mohamad Chaarawi + * Adding support for large datatypes (beyond the limit of a + * 32 bit integer. + * * *------------------------------------------------------------------------- */ @@ -95,11 +133,22 @@ H5S_mpio_all_type(const H5S_t *space, size_t elmt_size, H5_CHECKED_ASSIGN(nelmts, hsize_t, snelmts, hssize_t); total_bytes = (hsize_t)elmt_size * nelmts; - - /* fill in the return values */ - *new_type = MPI_BYTE; - H5_CHECKED_ASSIGN(*count, int, total_bytes, hsize_t); - *is_derived_type = FALSE; + /* Verify that the size can be expressed as a 32 bit integer */ + if(bigio_count >= total_bytes) { + /* fill in the return values */ + *new_type = MPI_BYTE; + H5_CHECKED_ASSIGN(*count, int, total_bytes, hsize_t); + *is_derived_type = FALSE; + } + else { + /* Create a LARGE derived datatype for this transfer */ + if (H5S_mpio_create_large_type (total_bytes, 0, MPI_BYTE, new_type) < 0) { + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, + "couldn't create a large datatype from the all selection") + } + *count = 1; + *is_derived_type = TRUE; + } done: FUNC_LEAVE_NOAPI(ret_value) @@ -167,27 +216,103 @@ H5S_mpio_create_point_datatype (size_t elmt_size, hsize_t num_points, HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) elmt_type_created = TRUE; + /* Check whether standard or BIGIO processing will be employeed */ + if(bigio_count >= num_points) { #if MPI_VERSION >= 3 - /* Create an MPI datatype for the whole point selection */ - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block((int)num_points, 1, disp, elmt_type, new_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_indexed_block failed", mpi_code) + /* Create an MPI datatype for the whole point selection */ + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed_block((int)num_points, 1, disp, elmt_type, new_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_indexed_block failed", mpi_code) #else - /* Allocate block sizes for MPI datatype call */ - if(NULL == (blocks = (int *)H5MM_malloc(sizeof(int) * num_points))) - HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") + /* Allocate block sizes for MPI datatype call */ + if(NULL == (blocks = (int *)H5MM_malloc(sizeof(int) * num_points))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") - for(u = 0; u < num_points; u++) - blocks[u] = 1; + for(u = 0; u < num_points; u++) + blocks[u] = 1; - /* Create an MPI datatype for the whole point selection */ - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)num_points, blocks, disp, elmt_type, new_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_indexed_block failed", mpi_code) + /* Create an MPI datatype for the whole point selection */ + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)num_points, blocks, disp, elmt_type, new_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_indexed_block failed", mpi_code) #endif - /* Commit MPI datatype for later use */ - if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + /* Commit MPI datatype for later use */ + if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + } + else { + /* use LARGE_DATATYPE:: + * We'll create an hindexed_block type for every 2G point count and then combine + * those and any remaining points into a single large datatype. + */ + int total_types, i; + int remaining_points; + int num_big_types; + hsize_t leftover; + + int *inner_blocks; + MPI_Aint *inner_disps; + MPI_Datatype *inner_types = NULL; + + /* Calculate how many Big MPI datatypes are needed to represent the buffer */ + num_big_types = (int)(num_points/bigio_count); + + leftover = (hsize_t)num_points - (hsize_t)num_big_types * (hsize_t)bigio_count; + H5_CHECKED_ASSIGN(remaining_points, int, leftover, hsize_t); + + total_types = (int)(remaining_points) ? (num_big_types + 1) : num_big_types; + + /* Allocate array if MPI derived types needed */ + if(NULL == (inner_types = (MPI_Datatype *)H5MM_malloc((sizeof(MPI_Datatype) * (size_t)total_types)))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") + + if(NULL == (inner_blocks = (int *)H5MM_malloc(sizeof(int) * (size_t)total_types))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") + + if(NULL == (inner_disps = (MPI_Aint *)H5MM_malloc(sizeof(MPI_Aint) * (size_t)total_types))) + HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate array of blocks") + + for(i=0 ; i= elmt_size) { + /* Use a single MPI datatype that has a 32 bit size */ + if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &inner_type))) HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } + else { + /* Create the compound datatype for this operation (> 2GB) */ + if (H5S_mpio_create_large_type (elmt_size, 0, MPI_BYTE, &inner_type) < 0) { + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, + "couldn't ccreate a large inner datatype in hyper selection") + } + } /******************************************************* * Construct the type by walking the hyperslab dims @@ -645,30 +790,93 @@ H5S_mpio_hyper_type(const H5S_t *space, size_t elmt_size, *******************************************************/ for(i = ((int)rank) - 1; i >= 0; --i) { #ifdef H5S_DEBUG - if(H5DEBUG(S)) - HDfprintf(H5DEBUG(S), "%s: Dimension i=%d \n" - "start=%Hd count=%Hu block=%Hu stride=%Hu, xtent=%Hu max_xtent=%d\n", - FUNC, i, d[i].start, d[i].count, d[i].block, d[i].strid, d[i].xtent, max_xtent[i]); + if(H5DEBUG(S)) + HDfprintf(H5DEBUG(S), "%s: Dimension i=%d \n" + "start=%Hd count=%Hu block=%Hu stride=%Hu, xtent=%Hu max_xtent=%d\n", + FUNC, i, d[i].start, d[i].count, d[i].block, d[i].strid, d[i].xtent, max_xtent[i]); #endif #ifdef H5S_DEBUG - if(H5DEBUG(S)) - HDfprintf(H5DEBUG(S), "%s: i=%d Making vector-type \n", FUNC,i); + if(H5DEBUG(S)) + HDfprintf(H5DEBUG(S), "%s: i=%d Making vector-type \n", FUNC,i); #endif /**************************************** * Build vector type of the selection. ****************************************/ - mpi_code = MPI_Type_vector((int)(d[i].count), /* count */ - (int)(d[i].block), /* blocklength */ - (int)(d[i].strid), /* stride */ - inner_type, /* old type */ - &outer_type); /* new type */ + if (bigio_count >= d[i].count && + bigio_count >= d[i].block && + bigio_count >= d[i].strid) { + + /* All the parameters fit into 32 bit integers so create the vector type normally */ + mpi_code = MPI_Type_vector((int)(d[i].count), /* count */ + (int)(d[i].block), /* blocklength */ + (int)(d[i].strid), /* stride */ + inner_type, /* old type */ + &outer_type); /* new type */ + + MPI_Type_free(&inner_type); + if(mpi_code != MPI_SUCCESS) + HMPI_GOTO_ERROR(FAIL, "couldn't create MPI vector type", mpi_code) + } + else { + /* Things get a bit more complicated and require LARGE_DATATYPE processing + * There are two MPI datatypes that need to be created: + * 1) an internal contiguous block; and + * 2) a collection of elements where an element is a contiguous block(1). + * Remember that the input arguments to the MPI-IO functions use integer + * values to represent element counts. We ARE allowed however, in the + * more recent MPI implementations to use constructed datatypes whereby + * the total number of bytes in a transfer could be : + * (2GB-1)number_of_blocks * the_datatype_extent. + */ + + MPI_Aint stride_in_bytes, inner_extent; + MPI_Datatype block_type; + + /* create a contiguous datatype inner_type x number of BLOCKS. + * Again we need to check that the number of BLOCKS can fit into + * a 32 bit integer */ + if (bigio_count < d[i].block) { + if (H5S_mpio_create_large_type(d[i].block, 0, inner_type, + &block_type) < 0) { + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, + "couldn't ccreate a large block datatype in hyper selection") + } + } + else { + if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)d[i].block, + inner_type, + &block_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } - MPI_Type_free(&inner_type); - if(mpi_code != MPI_SUCCESS) - HMPI_GOTO_ERROR(FAIL, "couldn't create MPI vector type", mpi_code) + MPI_Type_extent (inner_type, &inner_extent); + stride_in_bytes = inner_extent * (MPI_Aint)d[i].strid; - /**************************************** + /* If the element count is larger than what a 32 bit integer can hold, + * we call the large type creation function to handle that + */ + if (bigio_count < d[i].count) { + if (H5S_mpio_create_large_type (d[i].count, stride_in_bytes, block_type, + &outer_type) < 0) { + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, + "couldn't create a large outer datatype in hyper selection") + } + } + /* otherwise a regular create_hvector will do */ + else { + mpi_code = MPI_Type_create_hvector((int)d[i].count, /* count */ + 1, /* blocklength */ + stride_in_bytes, /* stride in bytes*/ + block_type, /* old type */ + &outer_type); /* new type */ + if(MPI_SUCCESS != mpi_code) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } + MPI_Type_free(&block_type); + MPI_Type_free(&inner_type); + } + /**************************************** * Then build the dimension type as (start, vector type, xtent). ****************************************/ /* calculate start and extent values of this dimension */ @@ -752,6 +960,10 @@ done: * * Programmer: kyang * + * Modifications: + * Mohamad Chaarawi + * Adding support for large datatypes (beyond the limit of a + * 32 bit integer. *------------------------------------------------------------------------- */ static herr_t @@ -774,8 +986,17 @@ H5S_mpio_span_hyper_type(const H5S_t *space, size_t elmt_size, HDassert(space->select.sel_info.hslab->span_lst->head); /* Create the base type for an element */ - if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &elmt_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + if (bigio_count >= elmt_size) { + if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)elmt_size, MPI_BYTE, &elmt_type))) { + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } + } + else { + if (H5S_mpio_create_large_type (elmt_size, 0, MPI_BYTE, &elmt_type) < 0) { + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, + "couldn't create a large element datatype in span_hyper selection") + } + } elmt_type_is_derived = TRUE; /* Compute 'down' sizes for each dimension */ @@ -821,14 +1042,15 @@ static herr_t H5S_obtain_datatype(const hsize_t *down, H5S_hyper_span_t *span, const MPI_Datatype *elmt_type, MPI_Datatype *span_type, size_t elmt_size) { - size_t alloc_count; /* Number of span tree nodes allocated at this level */ - size_t outercount; /* Number of span tree nodes at this level */ + size_t alloc_count = 0; /* Number of span tree nodes allocated at this level */ + size_t outercount = 0; /* Number of span tree nodes at this level */ MPI_Datatype *inner_type = NULL; hbool_t inner_types_freed = FALSE; /* Whether the inner_type MPI datatypes have been freed */ hbool_t span_type_valid = FALSE; /* Whether the span_type MPI datatypes is valid */ + hbool_t large_block = FALSE; /* Wether the block length is larger than 32 bit integer */ int *blocklen = NULL; MPI_Aint *disp = NULL; - H5S_hyper_span_t *tspan; /* Temporary pointer to span tree node */ + H5S_hyper_span_t *tspan = NULL; /* Temporary pointer to span tree node */ int mpi_code; /* MPI return status code */ herr_t ret_value = SUCCEED; /* Return value */ @@ -870,14 +1092,70 @@ H5S_obtain_datatype(const hsize_t *down, H5S_hyper_span_t *span, disp[outercount] = (MPI_Aint)elmt_size * tspan->low; H5_CHECK_OVERFLOW(tspan->nelem, hsize_t, int) blocklen[outercount] = (int)tspan->nelem; - tspan = tspan->next; + + if (bigio_count < blocklen[outercount]) { + large_block = TRUE; /* at least one block type is large, so set this flag to true */ + } + outercount++; } /* end while */ - if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)outercount, blocklen, disp, *elmt_type, span_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) - span_type_valid = TRUE; + /* Everything fits into integers, so cast them and use hindexed */ + if (bigio_count >= outercount && large_block == FALSE) { + + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)outercount, blocklen, disp, *elmt_type, span_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) + span_type_valid = TRUE; + } + else { /* LARGE_DATATYPE:: Something doesn't fit into a 32 bit integer */ + size_t i; + + for (i=0 ; i bigio_count) { + if (H5S_mpio_create_large_type (blocklen[i], 0, *elmt_type, &temp_type) < 0) { + HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL, + "couldn't create a large element datatype in span_hyper selection") + } + } + else { + if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous((int)blocklen[i], + *elmt_type, + &temp_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } + + /* combine the current datatype that is created with this current block type */ + if (0 == i) { /* first iteration, there is no combined datatype yet */ + *span_type = temp_type; + } + else { + int bl[2] = {1,1}; + MPI_Aint ds[2] = {disp[i-1],disp[i]}; + MPI_Datatype dt[2] = {*span_type, temp_type}; + + if (MPI_SUCCESS != (mpi_code = MPI_Type_create_struct (2, /* count */ + bl, /* blocklength */ + ds, /* stride in bytes*/ + dt, /* old type */ + &outer_type))){ /* new type */ + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) + } + *span_type = outer_type; + } + + if (outer_type != MPI_DATATYPE_NULL) + MPI_Type_free(&outer_type); + /* temp_type shouldn't be freed here... + * Note that we have simply copied it above (not MPI_Type_dup) + * into the 'span_type' argument of the caller. + * The caller needs to deal with it there! + */ + } + } /* end (LARGE_DATATYPE::) */ + } /* end if */ else { size_t u; /* Local index variable */ @@ -1091,5 +1369,139 @@ H5S_mpio_space_type(const H5S_t *space, size_t elmt_size, MPI_Datatype *new_type done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5S_mpio_space_type() */ + + +/*------------------------------------------------------------------------- + * Function: H5S_mpio_create_large_type + * + * Purpose: Create a large datatype of size larger than what a 32 bit integer + * can hold. + * + * Return: non-negative on success, negative on failure. + * + * *new_type the new datatype created + * + * Programmer: Mohamad Chaarawi + * + *------------------------------------------------------------------------- + */ +static herr_t H5S_mpio_create_large_type (hsize_t num_elements, + MPI_Aint stride_bytes, + MPI_Datatype old_type, + MPI_Datatype *new_type) +{ + int num_big_types; /* num times the 2G datatype will be repeated */ + int remaining_bytes; /* the number of bytes left that can be held in an int value */ + hsize_t leftover; + int block_len[2]; + int mpi_code; /* MPI return code */ + MPI_Datatype inner_type, outer_type, leftover_type, type[2]; + MPI_Aint disp[2], old_extent; + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_NOAPI_NOINIT + + /* Calculate how many Big MPI datatypes are needed to represent the buffer */ + num_big_types = (int)(num_elements/bigio_count); + leftover = num_elements - num_big_types * (hsize_t)bigio_count; + H5_CHECKED_ASSIGN(remaining_bytes, int, leftover, hsize_t); + + /* Create a contiguous datatype of size equal to the largest + * number that a 32 bit integer can hold x size of old type. + * If the displacement is 0, then the type is contiguous, otherwise + * use type_hvector to create the type with the displacement provided + */ + if (0 == stride_bytes) { + if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(bigio_count, + old_type, + &inner_type))) { + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } + } + else { + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector (bigio_count, + 1, + stride_bytes, + old_type, + &inner_type))) { + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } + } + + /* Create a contiguous datatype of the buffer (minus the remaining < 2GB part) + * If a stride is present, use hvector type + */ + if (0 == stride_bytes) { + if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(num_big_types, + inner_type, + &outer_type))) { + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } + } + else { + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector (num_big_types, + 1, + stride_bytes, + inner_type, + &outer_type))) { + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } + } + + MPI_Type_free(&inner_type); + + /* If there is a remaining part create a contiguous/vector datatype and then + * use a struct datatype to encapsulate everything. + */ + if(remaining_bytes) { + if (stride_bytes == 0) { + if(MPI_SUCCESS != (mpi_code = MPI_Type_contiguous (remaining_bytes, + old_type, + &leftover_type))) { + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } + } + else { + if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hvector + ((int)(num_elements - (hsize_t)num_big_types*bigio_count), + 1, + stride_bytes, + old_type, + &leftover_type))) { + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } + } + + MPI_Type_extent (old_type, &old_extent); + + /* Set up the arguments for MPI_Type_struct constructor */ + type[0] = outer_type; + type[1] = leftover_type; + block_len[0] = 1; + block_len[1] = 1; + disp[0] = 0; + disp[1] = (old_extent+stride_bytes)*num_big_types*(MPI_Aint)bigio_count; + + if(MPI_SUCCESS != (mpi_code = + MPI_Type_create_struct(2, block_len, disp, type, new_type))) { + HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + } + + MPI_Type_free(&outer_type); + MPI_Type_free(&leftover_type); + } + else { + /* There are no remaining bytes so just set the new type to + * the outer type created */ + *new_type = outer_type; + } + + if(MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code) + +done: + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5S_mpio_create_large_type */ + #endif /* H5_HAVE_PARALLEL */ -- cgit v0.12 From 0c4c562cc583fd814f26ba652cbcf82dc6d33cac Mon Sep 17 00:00:00 2001 From: Richard Warren Date: Mon, 10 Jul 2017 16:17:26 -0400 Subject: Include code fixes and additional modifications pointed out by code reviewers --- MANIFEST | 1 + src/H5Smpio.c | 12 ++++----- testpar/CMakeLists.txt | 1 + testpar/Makefile.am | 2 +- testpar/t_bigio.c | 72 ++++++++++++++++++++++++++++++++++++++++++++------ 5 files changed, 73 insertions(+), 15 deletions(-) diff --git a/MANIFEST b/MANIFEST index 475b674..27f38be 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1222,6 +1222,7 @@ ./testpar/COPYING ./testpar/Makefile.am +./testpar/t_bigio.c ./testpar/t_cache.c ./testpar/t_cache_image.c ./testpar/t_chunk_alloc.c diff --git a/src/H5Smpio.c b/src/H5Smpio.c index 7319a80..46f7a59 100644 --- a/src/H5Smpio.c +++ b/src/H5Smpio.c @@ -232,7 +232,7 @@ H5S_mpio_create_point_datatype (size_t elmt_size, hsize_t num_points, /* Create an MPI datatype for the whole point selection */ if(MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)num_points, blocks, disp, elmt_type, new_type))) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_indexed_block failed", mpi_code) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code) #endif /* Commit MPI datatype for later use */ @@ -871,7 +871,7 @@ H5S_mpio_hyper_type(const H5S_t *space, size_t elmt_size, block_type, /* old type */ &outer_type); /* new type */ if(MPI_SUCCESS != mpi_code) - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) } MPI_Type_free(&block_type); MPI_Type_free(&inner_type); @@ -1424,7 +1424,7 @@ static herr_t H5S_mpio_create_large_type (hsize_t num_elements, stride_bytes, old_type, &inner_type))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) } } @@ -1444,7 +1444,7 @@ static herr_t H5S_mpio_create_large_type (hsize_t num_elements, stride_bytes, inner_type, &outer_type))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) } } @@ -1468,7 +1468,7 @@ static herr_t H5S_mpio_create_large_type (hsize_t num_elements, stride_bytes, old_type, &leftover_type))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code) } } @@ -1484,7 +1484,7 @@ static herr_t H5S_mpio_create_large_type (hsize_t num_elements, if(MPI_SUCCESS != (mpi_code = MPI_Type_create_struct(2, block_len, disp, type, new_type))) { - HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code) } MPI_Type_free(&outer_type); diff --git a/testpar/CMakeLists.txt b/testpar/CMakeLists.txt index 298d326..e994b65 100644 --- a/testpar/CMakeLists.txt +++ b/testpar/CMakeLists.txt @@ -43,6 +43,7 @@ ENDMACRO (ADD_H5P_EXE file) set (H5P_TESTS t_mpi + t_bigio t_cache t_pflush1 t_pflush2 diff --git a/testpar/Makefile.am b/testpar/Makefile.am index b87c1df..7029bd5 100644 --- a/testpar/Makefile.am +++ b/testpar/Makefile.am @@ -23,7 +23,7 @@ AM_CPPFLAGS+=-I$(top_srcdir)/src -I$(top_srcdir)/test # Test programs. These are our main targets. # -TEST_PROG_PARA=t_mpi testphdf5 t_cache t_cache_image t_pflush1 t_pflush2 t_pshutdown t_prestart t_init_term t_shapesame +TEST_PROG_PARA=t_mpi t_bigio testphdf5 t_cache t_cache_image t_pflush1 t_pflush2 t_pshutdown t_prestart t_init_term t_shapesame check_PROGRAMS = $(TEST_PROG_PARA) diff --git a/testpar/t_bigio.c b/testpar/t_bigio.c index 2f80e45..830ea54 100644 --- a/testpar/t_bigio.c +++ b/testpar/t_bigio.c @@ -2060,13 +2060,62 @@ coll_chunktest(const char* filename, +/***************************************************************************** + * + * Function: do_express_test() + * + * Purpose: Do an MPI_Allreduce to obtain the maximum value returned + * by GetTestExpress() across all processes. Return this + * value. + * + * Envirmoment variables can be different across different + * processes. This function ensures that all processes agree + * on whether to do an express test. + * + * Return: Success: Maximum of the values returned by + * GetTestExpress() across all processes. + * + * Failure: -1 + * + * Programmer: JRM -- 4/25/06 + * + *****************************************************************************/ +static int +do_express_test(int world_mpi_rank) +{ + int express_test; + int max_express_test; + int result; + + express_test = GetTestExpress(); + + result = MPI_Allreduce((void *)&express_test, + (void *)&max_express_test, + 1, + MPI_INT, + MPI_MAX, + MPI_COMM_WORLD); + + if ( result != MPI_SUCCESS ) { + nerrors++; + max_express_test = -1; + if ( VERBOSE_MED && (world_mpi_rank == 0)) { + HDfprintf(stdout, "%d:%s: MPI_Allreduce() failed.\n", + world_mpi_rank, FUNC ); + } + } + + return(max_express_test); + +} /* do_express_test() */ int main(int argc, char **argv) { - + int ExpressMode = 0; hsize_t newsize = 1048576; hsize_t oldsize = H5S_mpio_set_bigio_count(newsize); + if (newsize != oldsize) { bigcount = newsize * 2; } @@ -2075,20 +2124,27 @@ int main(int argc, char **argv) MPI_Comm_size(MPI_COMM_WORLD,&mpi_size); MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank); + ExpressMode = do_express_test(mpi_rank); + dataset_big_write(); MPI_Barrier(MPI_COMM_WORLD); dataset_big_read(); MPI_Barrier(MPI_COMM_WORLD); - oldsize = H5S_mpio_set_bigio_count(16384); + if (ExpressMode > 1) { + printf("***Express test mode on. Several tests are skipped\n"); + } + else { + coll_chunk1(); + MPI_Barrier(MPI_COMM_WORLD); + coll_chunk2(); + MPI_Barrier(MPI_COMM_WORLD); + coll_chunk3(); + } - coll_chunk1(); - MPI_Barrier(MPI_COMM_WORLD); - coll_chunk2(); - MPI_Barrier(MPI_COMM_WORLD); - coll_chunk3(); - MPI_Barrier(MPI_COMM_WORLD); + /* close HDF5 library */ + H5close(); MPI_Finalize(); -- cgit v0.12 From 32b0d6ca9f95fe46c2e52b58cabd4f5af4c107d3 Mon Sep 17 00:00:00 2001 From: Richard Warren Date: Mon, 10 Jul 2017 18:20:39 -0400 Subject: Fix up the ExpressMode check for skipping slow running tests. --- testpar/t_bigio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testpar/t_bigio.c b/testpar/t_bigio.c index 830ea54..a4a1323 100644 --- a/testpar/t_bigio.c +++ b/testpar/t_bigio.c @@ -2132,7 +2132,7 @@ int main(int argc, char **argv) dataset_big_read(); MPI_Barrier(MPI_COMM_WORLD); - if (ExpressMode > 1) { + if (ExpressMode > 0) { printf("***Express test mode on. Several tests are skipped\n"); } else { -- cgit v0.12 From 6a5aa46e936340ed540359290374fa909f9213a6 Mon Sep 17 00:00:00 2001 From: Richard Warren Date: Thu, 13 Jul 2017 10:12:08 -0400 Subject: Added a brief outline for Large MPI-IO transfers into RELEASE.txt --- release_docs/RELEASE.txt | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index 20d58b3..4335b37 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -62,7 +62,29 @@ New Features Parallel Library: ----------------- - - + - Large MPI-IO transfers + + Previous releases of PHDF5 would fail when attempting to + read or write greater than 2GB of data in a single IO operation. + This issue stems principally from an MPI API whose definitions + utilize 32 bit integers to describe the number of data elements + and datatype that MPI should use to effect a data transfer. + Historically, HDF5 has invoked MPI-IO with the number of + elements in a contiguous buffer represented as the length + of that buffer in bytes. + + Resolving the issue and thus enabling larger MPI-IO transfers + is accomplished first, by detecting when a user IO request would + exceed the 2GB limit as described above. Once a transfer request + is identified as requiring special handling, PHDF5 now creates a + derived datatype consisting of a vector of fixed sized blocks + which is in turn wrapped within a single MPI_Type_struct to + contain the vector and any remaining data. The newly created + datatype is then used in place of MPI_BYTE and can be used to + fulfill the original user request without encountering API + errors. + + (RAW – 2017/07/11, HDFFV-8839) Fortran Library: ---------------- -- cgit v0.12