/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * Copyright by The HDF Group.                                               *
 * All rights reserved.                                                      *
 *                                                                           *
 * This file is part of HDF5.  The full HDF5 copyright notice, including     *
 * terms governing use, modification, and redistribution, is contained in    *
 * the COPYING file, which can be found at the root of the source code       *
 * distribution tree, or in https://www.hdfgroup.org/licenses.               *
 * If you do not have access to either file, you may request a copy from     *
 * help@hdfgroup.org.                                                        *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/*
 * Example of using the parallel HDF5 library to collectively write to
 * datasets with filters applied to them when one or MPI ranks do not
 * have data to contribute to the dataset.
 *
 * If the HDF5_NOCLEANUP environment variable is set, the file that
 * this example creates will not be removed as the example finishes.
 *
 * The need of requirement of parallel file prefix is that in general
 * the current working directory in which compiling is done, is not suitable
 * for parallel I/O and there is no standard pathname for parallel file
 * systems. In some cases, the parallel file name may even need some
 * parallel file type prefix such as: "pfs:/GF/...".  Therefore, this
 * example parses the HDF5_PARAPREFIX environment variable for a prefix,
 * if one is needed.
 */

#include <stdlib.h>

#include "hdf5.h"

#if defined(H5_HAVE_PARALLEL) && defined(H5_HAVE_PARALLEL_FILTERED_WRITES)

#define EXAMPLE_FILE      "ph5_filtered_writes_no_sel.h5"
#define EXAMPLE_DSET_NAME "DSET"

#define EXAMPLE_DSET_DIMS           2
#define EXAMPLE_DSET_CHUNK_DIM_SIZE 10

/* Dataset datatype */
#define HDF5_DATATYPE H5T_NATIVE_INT
typedef int C_DATATYPE;

#ifndef PATH_MAX
#define PATH_MAX 512
#endif

/* Global variables */
int mpi_rank, mpi_size;

/*
 * Routine to set an HDF5 filter on the given DCPL
 */
static void
set_filter(hid_t dcpl_id)
{
    htri_t filter_avail;

    /*
     * Check if 'deflate' filter is available
     */
    filter_avail = H5Zfilter_avail(H5Z_FILTER_DEFLATE);
    if (filter_avail < 0)
        return;
    else if (filter_avail) {
        /*
         * Set 'deflate' filter with reasonable
         * compression level on DCPL
         */
        H5Pset_deflate(dcpl_id, 6);
    }
    else {
        /*
         * Set Fletcher32 checksum filter on DCPL
         * since it is always available in HDF5
         */
        H5Pset_fletcher32(dcpl_id);
    }
}

/*
 * Routine to fill a data buffer with data. Assumes
 * dimension rank is 2 and data is stored contiguous.
 */
void
fill_databuf(hsize_t start[], hsize_t count[], hsize_t stride[], C_DATATYPE *data)
{
    C_DATATYPE *dataptr = data;
    hsize_t     i, j;

    /* Use MPI rank value for data */
    for (i = 0; i < count[0]; i++) {
        for (j = 0; j < count[1]; j++) {
            *dataptr++ = mpi_rank;
        }
    }
}

/* Cleanup created file */
static void
cleanup(char *filename)
{
    hbool_t do_cleanup = getenv("HDF5_NOCLEANUP") ? 0 : 1;

    if (do_cleanup)
        MPI_File_delete(filename, MPI_INFO_NULL);
}

/*
 * Routine to write to a dataset in a fashion
 * where no chunks in the dataset are written
 * to by more than 1 MPI rank. This will
 * generally give the best performance as the
 * MPI ranks will need the least amount of
 * inter-process communication.
 */
static void
write_dataset_some_no_sel(hid_t file_id, hid_t dxpl_id)
{
    C_DATATYPE data[EXAMPLE_DSET_CHUNK_DIM_SIZE][4 * EXAMPLE_DSET_CHUNK_DIM_SIZE];
    hsize_t    dataset_dims[EXAMPLE_DSET_DIMS];
    hsize_t    sel_dims[1];
    hsize_t    chunk_dims[EXAMPLE_DSET_DIMS];
    hsize_t    start[EXAMPLE_DSET_DIMS];
    hsize_t    stride[EXAMPLE_DSET_DIMS];
    hsize_t    count[EXAMPLE_DSET_DIMS];
    hbool_t    no_selection;
    size_t     i, j;
    hid_t      dset_id        = H5I_INVALID_HID;
    hid_t      dcpl_id        = H5I_INVALID_HID;
    hid_t      file_dataspace = H5I_INVALID_HID;
    hid_t      mem_dataspace  = H5I_INVALID_HID;

    /*
     * ------------------------------------
     * Setup Dataset Creation Property List
     * ------------------------------------
     */

    dcpl_id = H5Pcreate(H5P_DATASET_CREATE);

    /*
     * REQUIRED: Dataset chunking must be enabled to
     *           apply a data filter to the dataset.
     *           Chunks in the dataset are of size
     *           EXAMPLE_DSET_CHUNK_DIM_SIZE x EXAMPLE_DSET_CHUNK_DIM_SIZE.
     */
    chunk_dims[0] = EXAMPLE_DSET_CHUNK_DIM_SIZE;
    chunk_dims[1] = EXAMPLE_DSET_CHUNK_DIM_SIZE;
    H5Pset_chunk(dcpl_id, EXAMPLE_DSET_DIMS, chunk_dims);

    /* Set filter to be applied to created datasets */
    set_filter(dcpl_id);

    /*
     * ------------------------------------
     * Define the dimensions of the dataset
     * and create it
     * ------------------------------------
     */

    /*
     * Create a dataset composed of 4 chunks
     * per MPI rank. The first dataset dimension
     * scales according to the number of MPI ranks.
     * The second dataset dimension stays fixed
     * according to the chunk size.
     */
    dataset_dims[0] = EXAMPLE_DSET_CHUNK_DIM_SIZE * mpi_size;
    dataset_dims[1] = 4 * EXAMPLE_DSET_CHUNK_DIM_SIZE;

    file_dataspace = H5Screate_simple(EXAMPLE_DSET_DIMS, dataset_dims, NULL);

    /* Create the dataset */
    dset_id = H5Dcreate2(file_id, EXAMPLE_DSET_NAME, HDF5_DATATYPE, file_dataspace, H5P_DEFAULT, dcpl_id,
                         H5P_DEFAULT);

    /*
     * ------------------------------------
     * Setup selection in the dataset for
     * each MPI rank
     * ------------------------------------
     */

    /*
     * Odd rank value MPI ranks do not
     * contribute any data to the dataset.
     */
    no_selection = (mpi_rank % 2) == 1;

    if (no_selection) {
        /*
         * MPI ranks not contributing data to
         * the dataset should call H5Sselect_none
         * on the file dataspace that will be
         * passed to H5Dwrite.
         */
        H5Sselect_none(file_dataspace);
    }
    else {
        /*
         * Even MPI ranks contribute data to
         * the dataset. Each MPI rank's selection
         * covers a single chunk in the first dataset
         * dimension. Each MPI rank's selection
         * covers 4 chunks in the second dataset
         * dimension. This leads to each contributing
         * MPI rank writing to 4 chunks of the dataset.
         */
        start[0]  = mpi_rank * EXAMPLE_DSET_CHUNK_DIM_SIZE;
        start[1]  = 0;
        stride[0] = 1;
        stride[1] = 1;
        count[0]  = EXAMPLE_DSET_CHUNK_DIM_SIZE;
        count[1]  = 4 * EXAMPLE_DSET_CHUNK_DIM_SIZE;

        H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, NULL);

        /*
         * --------------------------------------
         * Fill data buffer with MPI rank's rank
         * value to make it easy to see which
         * part of the dataset each rank wrote to
         * --------------------------------------
         */

        fill_databuf(start, count, stride, &data[0][0]);

        /*
         * ---------------------------------
         * Create 1-D memory dataspace
         * ---------------------------------
         */
        sel_dims[0]   = count[0] * count[1];
        mem_dataspace = H5Screate_simple(1, sel_dims, NULL);
    }

    /*
     * ---------------------------------
     * Write to the dataset collectively
     * ---------------------------------
     */

    H5Dwrite(dset_id, HDF5_DATATYPE, no_selection ? H5S_ALL : mem_dataspace, file_dataspace, dxpl_id, data);

    /*
     * --------------
     * Close HDF5 IDs
     * --------------
     */

    H5Sclose(mem_dataspace);
    H5Sclose(file_dataspace);
    H5Pclose(dcpl_id);
    H5Dclose(dset_id);
}

int
main(int argc, char **argv)
{
    MPI_Comm comm       = MPI_COMM_WORLD;
    MPI_Info info       = MPI_INFO_NULL;
    hid_t    file_id    = H5I_INVALID_HID;
    hid_t    fapl_id    = H5I_INVALID_HID;
    hid_t    dxpl_id    = H5I_INVALID_HID;
    char *   par_prefix = NULL;
    char     filename[PATH_MAX];

    MPI_Init(&argc, &argv);
    MPI_Comm_size(comm, &mpi_size);
    MPI_Comm_rank(comm, &mpi_rank);

    /*
     * ----------------------------------
     * Start parallel access to HDF5 file
     * ----------------------------------
     */

    /* Setup File Access Property List with parallel I/O access */
    fapl_id = H5Pcreate(H5P_FILE_ACCESS);
    H5Pset_fapl_mpio(fapl_id, comm, info);

    /*
     * OPTIONAL: Set collective metadata reads on FAPL to allow
     *           parallel writes to filtered datasets to perform
     *           better at scale. While not strictly necessary,
     *           this is generally recommended.
     */
    H5Pset_all_coll_metadata_ops(fapl_id, true);

    /*
     * OPTIONAL: Set the latest file format version for HDF5 in
     *           order to gain access to different dataset chunk
     *           index types and better data encoding methods.
     *           While not strictly necessary, this is generally
     *           recommended.
     */
    H5Pset_libver_bounds(fapl_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST);

    /* Parse any parallel prefix and create filename */
    par_prefix = getenv("HDF5_PARAPREFIX");

    snprintf(filename, PATH_MAX, "%s%s%s", par_prefix ? par_prefix : "", par_prefix ? "/" : "", EXAMPLE_FILE);

    /* Create HDF5 file */
    file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);

    /*
     * --------------------------------------
     * Setup Dataset Transfer Property List
     * with collective I/O
     * --------------------------------------
     */

    dxpl_id = H5Pcreate(H5P_DATASET_XFER);

    /*
     * REQUIRED: Setup collective I/O for the dataset
     *           write operations. Parallel writes to
     *           filtered datasets MUST be collective,
     *           even if some ranks have no data to
     *           contribute to the write operation.
     */
    H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE);

    /*
     * --------------------------------
     * Create and write to the dataset
     * --------------------------------
     */

    /*
     * Write to a dataset in a fashion where no
     * chunks in the dataset are written to by
     * more than 1 MPI rank and some MPI ranks
     * have nothing to contribute to the dataset.
     * In this case, the MPI ranks that have no
     * data to contribute must still participate
     * in the collective H5Dwrite call, but should
     * call H5Sselect_none on the file dataspace
     * passed to the H5Dwrite call.
     */
    write_dataset_some_no_sel(file_id, dxpl_id);

    /*
     * ------------------
     * Close all HDF5 IDs
     * ------------------
     */

    H5Pclose(dxpl_id);
    H5Pclose(fapl_id);
    H5Fclose(file_id);

    printf("PHDF5 example finished with no errors\n");

    /*
     * ------------------------------------
     * Cleanup created HDF5 file and finish
     * ------------------------------------
     */

    cleanup(filename);

    MPI_Finalize();

    return 0;
}

#else

int
main(void)
{
    printf("HDF5 not configured with parallel support or parallel filtered writes are disabled!\n");
    return 0;
}

#endif