HDF5Examples/C/H5PAR/ph5_filtered_writes.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * Copyright by The HDF Group.                                               *
 * All rights reserved.                                                      *
 *                                                                           *
 * This file is part of HDF5.  The full HDF5 copyright notice, including     *
 * terms governing use, modification, and redistribution, is contained in    *
 * the COPYING file, which can be found at the root of the source code       *
 * distribution tree, or in https://www.hdfgroup.org/licenses.               *
 * If you do not have access to either file, you may request a copy from     *
 * help@hdfgroup.org.                                                        *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

/*
 * Example of using the parallel HDF5 library to write to datasets
 * with filters applied to them.
 *
 * If the HDF5_NOCLEANUP environment variable is set, the file that
 * this example creates will not be removed as the example finishes.
 *
 * The need of requirement of parallel file prefix is that in general
 * the current working directory in which compiling is done, is not suitable
 * for parallel I/O and there is no standard pathname for parallel file
 * systems. In some cases, the parallel file name may even need some
 * parallel file type prefix such as: "pfs:/GF/...".  Therefore, this
 * example parses the HDF5_PARAPREFIX environment variable for a prefix,
 * if one is needed.
 */

#include <stdlib.h>

#include "hdf5.h"

#if defined(H5_HAVE_PARALLEL) && defined(H5_HAVE_PARALLEL_FILTERED_WRITES)

#define EXAMPLE_FILE       "ph5_filtered_writes.h5"
#define EXAMPLE_DSET1_NAME "DSET1"
#define EXAMPLE_DSET2_NAME "DSET2"

#define EXAMPLE_DSET_DIMS           2
#define EXAMPLE_DSET_CHUNK_DIM_SIZE 10

/* Dataset datatype */
#define HDF5_DATATYPE H5T_NATIVE_INT
typedef int C_DATATYPE;

#ifndef PATH_MAX
#define PATH_MAX 512
#endif

/* Global variables */
int mpi_rank, mpi_size;

/*
 * Routine to set an HDF5 filter on the given DCPL
 */
static void
set_filter(hid_t dcpl_id)
{
    htri_t filter_avail;

    /*
     * Check if 'deflate' filter is available
     */
    filter_avail = H5Zfilter_avail(H5Z_FILTER_DEFLATE);
    if (filter_avail < 0)
        return;
    else if (filter_avail) {
        /*
         * Set 'deflate' filter with reasonable
         * compression level on DCPL
         */
        H5Pset_deflate(dcpl_id, 6);
    }
    else {
        /*
         * Set Fletcher32 checksum filter on DCPL
         * since it is always available in HDF5
         */
        H5Pset_fletcher32(dcpl_id);
    }
}

/*
 * Routine to fill a data buffer with data. Assumes
 * dimension rank is 2 and data is stored contiguous.
 */
void
fill_databuf(hsize_t start[], hsize_t count[], hsize_t stride[], C_DATATYPE *data)
{
    C_DATATYPE *dataptr = data;
    hsize_t     i, j;

    /* Use MPI rank value for data */
    for (i = 0; i < count[0]; i++) {
        for (j = 0; j < count[1]; j++) {
            *dataptr++ = mpi_rank;
        }
    }
}

/* Cleanup created file */
static void
cleanup(char *filename)
{
    hbool_t do_cleanup = getenv(HDF5_NOCLEANUP) ? 0 : 1;

    if (do_cleanup)
        MPI_File_delete(filename, MPI_INFO_NULL);
}

/*
 * Routine to write to a dataset in a fashion
 * where no chunks in the dataset are written
 * to by more than 1 MPI rank. This will
 * generally give the best performance as the
 * MPI ranks will need the least amount of
 * inter-process communication.
 */
static void
write_dataset_no_overlap(hid_t file_id, hid_t dxpl_id)
{
    C_DATATYPE data[EXAMPLE_DSET_CHUNK_DIM_SIZE][4 * EXAMPLE_DSET_CHUNK_DIM_SIZE];
    hsize_t    dataset_dims[EXAMPLE_DSET_DIMS];
    hsize_t    chunk_dims[EXAMPLE_DSET_DIMS];
    hsize_t    start[EXAMPLE_DSET_DIMS];
    hsize_t    stride[EXAMPLE_DSET_DIMS];
    hsize_t    count[EXAMPLE_DSET_DIMS];
    hid_t      dset_id        = H5I_INVALID_HID;
    hid_t      dcpl_id        = H5I_INVALID_HID;
    hid_t      file_dataspace = H5I_INVALID_HID;

    /*
     * ------------------------------------
     * Setup Dataset Creation Property List
     * ------------------------------------
     */

    dcpl_id = H5Pcreate(H5P_DATASET_CREATE);

    /*
     * REQUIRED: Dataset chunking must be enabled to
     *           apply a data filter to the dataset.
     *           Chunks in the dataset are of size
     *           EXAMPLE_DSET_CHUNK_DIM_SIZE x EXAMPLE_DSET_CHUNK_DIM_SIZE.
     */
    chunk_dims[0] = EXAMPLE_DSET_CHUNK_DIM_SIZE;
    chunk_dims[1] = EXAMPLE_DSET_CHUNK_DIM_SIZE;
    H5Pset_chunk(dcpl_id, EXAMPLE_DSET_DIMS, chunk_dims);

    /* Set filter to be applied to created datasets */
    set_filter(dcpl_id);

    /*
     * ------------------------------------
     * Define the dimensions of the dataset
     * and create it
     * ------------------------------------
     */

    /*
     * Create a dataset composed of 4 chunks
     * per MPI rank. The first dataset dimension
     * scales according to the number of MPI ranks.
     * The second dataset dimension stays fixed
     * according to the chunk size.
     */
    dataset_dims[0] = EXAMPLE_DSET_CHUNK_DIM_SIZE * mpi_size;
    dataset_dims[1] = 4 * EXAMPLE_DSET_CHUNK_DIM_SIZE;

    file_dataspace = H5Screate_simple(EXAMPLE_DSET_DIMS, dataset_dims, NULL);

    /* Create the dataset */
    dset_id = H5Dcreate2(file_id, EXAMPLE_DSET1_NAME, HDF5_DATATYPE, file_dataspace, H5P_DEFAULT, dcpl_id,
                         H5P_DEFAULT);

    /*
     * ------------------------------------
     * Setup selection in the dataset for
     * each MPI rank
     * ------------------------------------
     */

    /*
     * Each MPI rank's selection covers a
     * single chunk in the first dataset
     * dimension. Each MPI rank's selection
     * covers 4 chunks in the second dataset
     * dimension. This leads to each MPI rank
     * writing to 4 chunks of the dataset.
     */
    start[0]  = mpi_rank * EXAMPLE_DSET_CHUNK_DIM_SIZE;
    start[1]  = 0;
    stride[0] = 1;
    stride[1] = 1;
    count[0]  = EXAMPLE_DSET_CHUNK_DIM_SIZE;
    count[1]  = 4 * EXAMPLE_DSET_CHUNK_DIM_SIZE;

    H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, NULL);

    /*
     * --------------------------------------
     * Fill data buffer with MPI rank's rank
     * value to make it easy to see which
     * part of the dataset each rank wrote to
     * --------------------------------------
     */

    fill_databuf(start, count, stride, &data[0][0]);

    /*
     * ---------------------------------
     * Write to the dataset collectively
     * ---------------------------------
     */

    H5Dwrite(dset_id, HDF5_DATATYPE, H5S_BLOCK, file_dataspace, dxpl_id, data);

    /*
     * --------------
     * Close HDF5 IDs
     * --------------
     */

    H5Sclose(file_dataspace);
    H5Pclose(dcpl_id);
    H5Dclose(dset_id);
}

/*
 * Routine to write to a dataset in a fashion
 * where every chunk in the dataset is written
 * to by every MPI rank. This will generally
 * give the worst performance as the MPI ranks
 * will need the most amount of inter-process
 * communication.
 */
static void
write_dataset_overlap(hid_t file_id, hid_t dxpl_id)
{
    C_DATATYPE *data = NULL;
    hsize_t     dataset_dims[EXAMPLE_DSET_DIMS];
    hsize_t     chunk_dims[EXAMPLE_DSET_DIMS];
    hsize_t     start[EXAMPLE_DSET_DIMS];
    hsize_t     stride[EXAMPLE_DSET_DIMS];
    hsize_t     count[EXAMPLE_DSET_DIMS];
    hid_t       dset_id        = H5I_INVALID_HID;
    hid_t       dcpl_id        = H5I_INVALID_HID;
    hid_t       file_dataspace = H5I_INVALID_HID;

    /*
     * ------------------------------------
     * Setup Dataset Creation Property List
     * ------------------------------------
     */

    dcpl_id = H5Pcreate(H5P_DATASET_CREATE);

    /*
     * REQUIRED: Dataset chunking must be enabled to
     *           apply a data filter to the dataset.
     *           Chunks in the dataset are of size
     *           mpi_size x EXAMPLE_DSET_CHUNK_DIM_SIZE.
     */
    chunk_dims[0] = mpi_size;
    chunk_dims[1] = EXAMPLE_DSET_CHUNK_DIM_SIZE;
    H5Pset_chunk(dcpl_id, EXAMPLE_DSET_DIMS, chunk_dims);

    /* Set filter to be applied to created datasets */
    set_filter(dcpl_id);

    /*
     * ------------------------------------
     * Define the dimensions of the dataset
     * and create it
     * ------------------------------------
     */

    /*
     * Create a dataset composed of N chunks,
     * where N is the number of MPI ranks. The
     * first dataset dimension scales according
     * to the number of MPI ranks. The second
     * dataset dimension stays fixed according
     * to the chunk size.
     */
    dataset_dims[0] = mpi_size * chunk_dims[0];
    dataset_dims[1] = EXAMPLE_DSET_CHUNK_DIM_SIZE;

    file_dataspace = H5Screate_simple(EXAMPLE_DSET_DIMS, dataset_dims, NULL);

    /* Create the dataset */
    dset_id = H5Dcreate2(file_id, EXAMPLE_DSET2_NAME, HDF5_DATATYPE, file_dataspace, H5P_DEFAULT, dcpl_id,
                         H5P_DEFAULT);

    /*
     * ------------------------------------
     * Setup selection in the dataset for
     * each MPI rank
     * ------------------------------------
     */

    /*
     * Each MPI rank's selection covers
     * part of every chunk in the first
     * dimension. Each MPI rank's selection
     * covers all of every chunk in the
     * second dimension. This leads to
     * each MPI rank writing an equal
     * amount of data to every chunk
     * in the dataset.
     */
    start[0]  = mpi_rank;
    start[1]  = 0;
    stride[0] = chunk_dims[0];
    stride[1] = 1;
    count[0]  = mpi_size;
    count[1]  = EXAMPLE_DSET_CHUNK_DIM_SIZE;

    H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, NULL);

    /*
     * --------------------------------------
     * Fill data buffer with MPI rank's rank
     * value to make it easy to see which
     * part of the dataset each rank wrote to
     * --------------------------------------
     */

    data = malloc(mpi_size * EXAMPLE_DSET_CHUNK_DIM_SIZE * sizeof(C_DATATYPE));

    fill_databuf(start, count, stride, data);

    /*
     * ---------------------------------
     * Write to the dataset collectively
     * ---------------------------------
     */

    H5Dwrite(dset_id, HDF5_DATATYPE, H5S_BLOCK, file_dataspace, dxpl_id, data);

    free(data);

    /*
     * --------------
     * Close HDF5 IDs
     * --------------
     */

    H5Sclose(file_dataspace);
    H5Pclose(dcpl_id);
    H5Dclose(dset_id);
}

int
main(int argc, char **argv)
{
    MPI_Comm comm       = MPI_COMM_WORLD;
    MPI_Info info       = MPI_INFO_NULL;
    hid_t    file_id    = H5I_INVALID_HID;
    hid_t    fapl_id    = H5I_INVALID_HID;
    hid_t    dxpl_id    = H5I_INVALID_HID;
    char    *par_prefix = NULL;
    char     filename[PATH_MAX];

    MPI_Init(&argc, &argv);
    MPI_Comm_size(comm, &mpi_size);
    MPI_Comm_rank(comm, &mpi_rank);

    /*
     * ----------------------------------
     * Start parallel access to HDF5 file
     * ----------------------------------
     */

    /* Setup File Access Property List with parallel I/O access */
    fapl_id = H5Pcreate(H5P_FILE_ACCESS);
    H5Pset_fapl_mpio(fapl_id, comm, info);

    /*
     * OPTIONAL: It is generally recommended to set collective
     *           metadata reads on FAPL to perform metadata reads
     *           collectively, which usually allows filtered datasets
     *           to perform better at scale, although it is not
     *           strictly necessary.
     */
    H5Pset_all_coll_metadata_ops(fapl_id, true);

    /*
     * OPTIONAL: It is generally recommended to set collective
     *           metadata writes on FAPL to perform metadata writes
     *           collectively, which usually allows filtered datasets
     *           to perform better at scale, although it is not
     *           strictly necessary.
     */
    H5Pset_coll_metadata_write(fapl_id, true);

    /*
     * OPTIONAL: Set the latest file format version for HDF5 in
     *           order to gain access to different dataset chunk
     *           index types and better data encoding methods.
     *           While not strictly necessary, this is generally
     *           recommended.
     */
    H5Pset_libver_bounds(fapl_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST);

    /* Parse any parallel prefix and create filename */
    par_prefix = getenv("HDF5_PARAPREFIX");

    snprintf(filename, PATH_MAX, "%s%s%s", par_prefix ? par_prefix : "", par_prefix ? "/" : "", EXAMPLE_FILE);

    /* Create HDF5 file */
    file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);

    /*
     * --------------------------------------
     * Setup Dataset Transfer Property List
     * with collective I/O
     * --------------------------------------
     */

    dxpl_id = H5Pcreate(H5P_DATASET_XFER);

    /*
     * REQUIRED: Setup collective I/O for the dataset
     *           write operations. Parallel writes to
     *           filtered datasets MUST be collective,
     *           even if some ranks have no data to
     *           contribute to the write operation.
     *
     *           Refer to the 'ph5_filtered_writes_no_sel'
     *           example to see how to setup a dataset
     *           write when one or more MPI ranks have
     *           no data to contribute to the write
     *           operation.
     */
    H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE);

    /*
     * --------------------------------
     * Create and write to each dataset
     * --------------------------------
     */

    /*
     * Write to a dataset in a fashion where no
     * chunks in the dataset are written to by
     * more than 1 MPI rank. This will generally
     * give the best performance as the MPI ranks
     * will need the least amount of inter-process
     * communication.
     */
    write_dataset_no_overlap(file_id, dxpl_id);

    /*
     * Write to a dataset in a fashion where
     * every chunk in the dataset is written
     * to by every MPI rank. This will generally
     * give the worst performance as the MPI ranks
     * will need the most amount of inter-process
     * communication.
     */
    write_dataset_overlap(file_id, dxpl_id);

    /*
     * ------------------
     * Close all HDF5 IDs
     * ------------------
     */

    H5Pclose(dxpl_id);
    H5Pclose(fapl_id);
    H5Fclose(file_id);

    printf("PHDF5 example finished with no errors\n");

    /*
     * ------------------------------------
     * Cleanup created HDF5 file and finish
     * ------------------------------------
     */

    cleanup(filename);

    MPI_Finalize();

    return 0;
}

#else

int
main(void)
{
    printf("HDF5 not configured with parallel support or parallel filtered writes are disabled!\n");
    return 0;
}

#endif