summaryrefslogtreecommitdiffstats
path: root/tools/src/h5repack/h5repack.h
blob: 0838c214aea69eb04c050b9b6ff7f3aaac308f66 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 * Copyright by The HDF Group.                                               *
 * Copyright by the Board of Trustees of the University of Illinois.         *
 * All rights reserved.                                                      *
 *                                                                           *
 * This file is part of HDF5.  The full HDF5 copyright notice, including     *
 * terms governing use, modification, and redistribution, is contained in    *
 * the COPYING file, which can be found at the root of the source code       *
 * distribution tree, or in https://www.hdfgroup.org/licenses.               *
 * If you do not have access to either file, you may request a copy from     *
 * help@hdfgroup.org.                                                        *
 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */

#ifndef H5REPACK_H
#define H5REPACK_H

#include "H5private.h"
#include "hdf5.h"
#include "h5trav.h"

#define H5FOPENERROR      "unable to open file"
#define PFORMAT           "%-7s %-7s %-7s\n" /* chunk info, compression info, name*/
#define PFORMAT1          "%-7s %-7s %-7s"   /* chunk info, compression info, name*/
#define MAX_NC_NAME       256                /* max length of a name */
#define MAX_VAR_DIMS      32                 /* max per variable dimensions */
#define FORMAT_OBJ        " %-27s %s\n"      /* obj type, name */
#define FORMAT_OBJ_ATTR   "  %-27s %s\n"     /* obj type, name */
#define MAX_COMPACT_DSIZE 64512              /* max data size for compact layout. -1k for header size */

/* File space default information */
#define FS_PAGESIZE_DEF  4096
#define FS_STRATEGY_DEF  H5F_FSPACE_STRATEGY_FSM_AGGR
#define FS_PERSIST_DEF   FALSE
#define FS_THRESHOLD_DEF 1

/*-------------------------------------------------------------------------
 * data structures for command line options
 *-------------------------------------------------------------------------
 */

/* a list of names */
typedef struct {
    char obj[MAX_NC_NAME];
} obj_list_t;

/*
 the type of filter and additional parameter
 type can be one of the filters
 H5Z_FILTER_NONE        0,  uncompress if compressed
 H5Z_FILTER_DEFLATE     1 , deflation like gzip
 H5Z_FILTER_SHUFFLE     2 , shuffle the data
 H5Z_FILTER_FLETCHER32  3 , letcher32 checksum of EDC
 H5Z_FILTER_SZIP        4 , szip compression
 H5Z_FILTER_NBIT        5 , nbit compression
 H5Z_FILTER_SCALEOFFSET 6 , scaleoffset compression
*/

#define CD_VALUES 20

typedef struct {
    H5Z_filter_t filtn;                /* filter identification number */
    unsigned     filt_flag;            /* filter definition flag */
    unsigned     cd_values[CD_VALUES]; /* filter client data values */
    size_t       cd_nelmts;            /* filter client number of values */
} filter_info_t;

/* chunk lengths along each dimension and rank */
typedef struct {
    hsize_t chunk_lengths[MAX_VAR_DIMS];
    int     rank;
} chunk_info_t;

/* we currently define a maximum value for the filters array,
   that corresponds to the current library filters */
#define H5_REPACK_MAX_NFILTERS 6

/* information for one object, contains PATH, CHUNK info and FILTER info */
typedef struct {
    char          path[MAX_NC_NAME];              /* name of object */
    filter_info_t filter[H5_REPACK_MAX_NFILTERS]; /* filter array */
    int           nfilters;                       /* current number of filters */
    H5D_layout_t  layout;                         /* layout information */
    chunk_info_t  chunk;                          /* chunk information */
    hid_t         refobj_id;                      /* object ID, references */
} pack_info_t;

/* store a table of all objects */
typedef struct {
    unsigned int size;
    unsigned int nelems;
    pack_info_t *objs;
} pack_opttbl_t;

/*-------------------------------------------------------------------------
 * command line options
 *-------------------------------------------------------------------------
 */

/* all the above, ready to go to the hrepack call */
typedef struct {
    pack_opttbl_t *op_tbl;                           /* table with all -c and -f options */
    int            all_layout;                       /* apply the layout to all objects */
    int            all_filter;                       /* apply the filter to all objects */
    filter_info_t  filter_g[H5_REPACK_MAX_NFILTERS]; /*global filter array for the ALL case */
    int            n_filter_g;                       /* number of global filters */
    chunk_info_t   chunk_g;                          /* global chunk INFO for the ALL case */
    H5D_layout_t   layout_g;                         /* global layout information for the ALL case */
    int            verbose;                          /* verbose mode */
    hbool_t        merge;                            /* Merge external file. */
    hbool_t        prune;                            /* Don't follow external file. */
    hsize_t        min_comp;                         /* minimum size to compress, in bytes */
    int            use_native;                       /* use a native type in write */
    hbool_t        latest;                           /* pack file with the latest file format */
    H5F_libver_t   low_bound;                        /* The file's low bound as in H5Fset_libver_bounds() */
    H5F_libver_t   high_bound;                       /* The file's high bound as in H5Fset_libver_bounds() */
    hid_t          fin_fapl;                         /* FAPL to use for opening the input file */
    hid_t          fout_fapl;                        /* FAPL to use for opening/creating the output file */
    int            grp_compact; /* Set the maximum number of links to store as header messages in the group */
    int            grp_indexed; /* Set the minimum number of links to store in the indexed format */
    int            msg_size[8]; /* Minimum size of shared messages: dataspace,
                                   datatype, fill value, filter pipleline, attribute */
    const char *          ublock_filename; /* user block file name */
    hsize_t               ublock_size;     /* user block size */
    hsize_t               meta_block_size; /* metadata aggregation block size (for H5Pset_meta_block_size) */
    hsize_t               threshold;       /* alignment threshold for H5Pset_alignment */
    hsize_t               alignment;       /* alignment for H5Pset_alignment */
    H5F_fspace_strategy_t fs_strategy;     /* File space handling strategy */
    int                   fs_persist;      /* Free space section threshold */
    long                  fs_threshold;    /* Free space section threshold */
    long long             fs_pagesize;     /* File space page size */
} pack_opt_t;

typedef struct named_dt_t {
    H5O_token_t        obj_token; /* Object token for the named dtype in the in file */
    hid_t              id_out;    /* Open identifier for the dtype in the out file */
    struct named_dt_t *next;      /* Next dtype */
} named_dt_t;

/*-------------------------------------------------------------------------
 * public functions
 *-------------------------------------------------------------------------
 */

#ifdef __cplusplus
extern "C" {
#endif

int h5repack(const char *infile, const char *outfile, pack_opt_t *options);
int h5repack_addfilter(const char *str, pack_opt_t *options);
int h5repack_addlayout(const char *str, pack_opt_t *options);
int h5repack_init(pack_opt_t *options, int verbose, hbool_t latest);
int h5repack_end(pack_opt_t *options);
int h5repack_verify(const char *in_fname, const char *out_fname, pack_opt_t *options);
int h5repack_cmp_pl(const char *fname1, hid_t fname1_fapl, const char *fname2, hid_t fname2_fapl);

/* Note: The below copy_named_datatype(), named_datatype_free(), copy_attr()
 * and struct named_dt_t were located in h5repack_copy.c as static prior to
 * bugfix1726.
 * Made shared functions as copy_attr() was needed in h5repack_refs.c.
 * However copy_attr() may be obsoleted when H5Acopy is available and put back
 * others to static in h5repack_copy.c.
 */
hid_t copy_named_datatype(hid_t type_in, hid_t fidout, named_dt_t **named_dt_head_p, trav_table_t *travt,
                          pack_opt_t *options);
int   named_datatype_free(named_dt_t **named_dt_head_p, int ignore_err);
int   copy_attr(hid_t loc_in, hid_t loc_out, named_dt_t **named_dt_head_p, trav_table_t *travt,
                pack_opt_t *options);

#ifdef __cplusplus
}
#endif

/*-------------------------------------------------------------------------
 * private functions
 *-------------------------------------------------------------------------
 */

/*-------------------------------------------------------------------------
 * copy module
 *-------------------------------------------------------------------------
 */

int copy_objects(const char *fnamein, const char *fnameout, pack_opt_t *options);

int do_copy_refobjs(hid_t fidin, hid_t fidout, trav_table_t *travt, pack_opt_t *options);

/*-------------------------------------------------------------------------
 * filters and verify module
 *-------------------------------------------------------------------------
 */
void init_packobject(pack_info_t *obj);

/*-------------------------------------------------------------------------
 * filters and copy module
 *-------------------------------------------------------------------------
 */

int apply_filters(const char *name,    /* object name from traverse list */
                  int         rank,    /* rank of dataset */
                  hsize_t *   dims,    /* dimensions of dataset */
                  size_t      msize,   /* size of type */
                  hid_t       dcpl_id, /* dataset creation property list */
                  pack_opt_t *options, /* repack options */
                  int *       has_filter);    /* (OUT) object NAME has a filter */

/*-------------------------------------------------------------------------
 * options table
 *-------------------------------------------------------------------------
 */
int options_table_init(pack_opttbl_t **tbl);
int options_table_free(pack_opttbl_t *table);
int options_add_layout(obj_list_t *obj_list, unsigned n_objs, pack_info_t *pack, pack_opttbl_t *table);
int options_add_filter(obj_list_t *obj_list, unsigned n_objs, filter_info_t filt, pack_opttbl_t *table);
pack_info_t *options_get_object(const char *path, pack_opttbl_t *table);

/*-------------------------------------------------------------------------
 * parse functions
 *-------------------------------------------------------------------------
 */

obj_list_t *parse_filter(const char *str, unsigned *n_objs, filter_info_t *filt, pack_opt_t *options,
                         int *is_glb);

obj_list_t *parse_layout(const char *str, unsigned *n_objs, pack_info_t *pack, /* info about object */
                         pack_opt_t *options);

#endif /* H5REPACK_H */