diff options
Diffstat (limited to 'tools/src/h5stat/h5stat.c')
| -rw-r--r-- | tools/src/h5stat/h5stat.c | 1836 |
1 files changed, 1836 insertions, 0 deletions
diff --git a/tools/src/h5stat/h5stat.c b/tools/src/h5stat/h5stat.c new file mode 100644 index 0000000..d29c6e4 --- /dev/null +++ b/tools/src/h5stat/h5stat.c @@ -0,0 +1,1836 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by The HDF Group. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the COPYING file, which can be found at the root of the source code * + * distribution tree, or in https://www.hdfgroup.org/licenses. * + * If you do not have access to either file, you may request a copy from * + * help@hdfgroup.org. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +#include "H5private.h" /* Generic Functions */ +#include "h5tools.h" +#include "h5tools_utils.h" +#include "h5tools_ref.h" +#include "h5trav.h" +#include "hdf5.h" + +/* Name of tool */ +#define PROGRAMNAME "h5stat" + +/* Parameters to control statistics gathered */ + +/* Default threshold for small groups/datasets/attributes */ +#define DEF_SIZE_SMALL_GROUPS 10 +#define DEF_SIZE_SMALL_DSETS 10 +#define DEF_SIZE_SMALL_ATTRS 10 + +#define SIZE_SMALL_SECTS 10 + +#define H5_NFILTERS_IMPL \ + 8 /* Number of currently implemented filters + one to \ + accommodate for user-define filters + one \ + to accommodate datasets without any filters */ + +/* File space management strategies: see H5Fpublic.h for declarations */ +const char *FS_STRATEGY_NAME[] = {"H5F_FSPACE_STRATEGY_FSM_AGGR", + "H5F_FSPACE_STRATEGY_PAGE", + "H5F_FSPACE_STRATEGY_AGGR", + "H5F_FSPACE_STRATEGY_NONE", + "unknown", + NULL}; + +/* Datatype statistics for datasets */ +typedef struct dtype_info_t { + hid_t tid; /* ID of datatype */ + unsigned long count; /* Number of types found */ + unsigned long named; /* Number of types that are named */ +} dtype_info_t; + +typedef struct ohdr_info_t { + hsize_t total_size; /* Total size of object headers */ + hsize_t free_size; /* Total free space in object headers */ +} ohdr_info_t; + +/* Info to pass to the iteration functions */ +typedef struct iter_t { + hid_t fid; /* File ID */ + hsize_t filesize; /* Size of the file */ + unsigned long uniq_groups; /* Number of unique groups */ + unsigned long uniq_dsets; /* Number of unique datasets */ + unsigned long uniq_dtypes; /* Number of unique named datatypes */ + unsigned long uniq_links; /* Number of unique links */ + unsigned long uniq_others; /* Number of other unique objects */ + + unsigned long max_links; /* Maximum # of links to an object */ + hsize_t max_fanout; /* Maximum fanout from a group */ + unsigned long *num_small_groups; /* Size of small groups tracked */ + unsigned group_nbins; /* Number of bins for group counts */ + unsigned long *group_bins; /* Pointer to array of bins for group counts */ + ohdr_info_t group_ohdr_info; /* Object header information for groups */ + + hsize_t max_attrs; /* Maximum attributes from a group */ + unsigned long *num_small_attrs; /* Size of small attributes tracked */ + unsigned attr_nbins; /* Number of bins for attribute counts */ + unsigned long *attr_bins; /* Pointer to array of bins for attribute counts */ + + unsigned max_dset_rank; /* Maximum rank of dataset */ + unsigned long dset_rank_count[H5S_MAX_RANK]; /* Number of datasets of each rank */ + hsize_t max_dset_dims; /* Maximum dimension size of dataset */ + unsigned long *small_dset_dims; /* Size of dimensions of small datasets tracked */ + unsigned long dset_layouts[H5D_NLAYOUTS]; /* Type of storage for each dataset */ + unsigned long dset_comptype[H5_NFILTERS_IMPL]; /* Number of currently implemented filters */ + unsigned long dset_ntypes; /* Number of diff. dataset datatypes found */ + dtype_info_t *dset_type_info; /* Pointer to dataset datatype information found */ + unsigned dset_dim_nbins; /* Number of bins for dataset dimensions */ + unsigned long *dset_dim_bins; /* Pointer to array of bins for dataset dimensions */ + ohdr_info_t dset_ohdr_info; /* Object header information for datasets */ + hsize_t dset_storage_size; /* Size of raw data for datasets */ + hsize_t dset_external_storage_size; /* Size of raw data for datasets with external storage */ + ohdr_info_t dtype_ohdr_info; /* Object header information for datatypes */ + hsize_t groups_btree_storage_size; /* btree size for group */ + hsize_t groups_heap_storage_size; /* heap size for group */ + hsize_t attrs_btree_storage_size; /* btree size for attributes (1.8) */ + hsize_t attrs_heap_storage_size; /* fractal heap size for attributes (1.8) */ + hsize_t SM_hdr_storage_size; /* header size for SOHM table (1.8) */ + hsize_t SM_index_storage_size; /* index (btree & list) size for SOHM table (1.8) */ + hsize_t SM_heap_storage_size; /* fractal heap size for SOHM table (1.8) */ + hsize_t super_size; /* superblock size */ + hsize_t super_ext_size; /* superblock extension size */ + hsize_t ublk_size; /* user block size (if exists) */ + H5F_fspace_strategy_t fs_strategy; /* File space management strategy */ + hbool_t fs_persist; /* Free-space persist or not */ + hsize_t fs_threshold; /* Free-space section threshold */ + hsize_t fsp_size; /* File space page size */ + hsize_t free_space; /* Amount of freespace in the file */ + hsize_t free_hdr; /* Size of free space manager metadata in the file */ + unsigned long num_small_sects[SIZE_SMALL_SECTS]; /* Size of small free-space sections */ + unsigned sect_nbins; /* Number of bins for free-space section sizes */ + unsigned long *sect_bins; /* Pointer to array of bins for free-space section sizes */ + hsize_t datasets_index_storage_size; /* meta size for chunked dataset's indexing type */ + hsize_t datasets_heap_storage_size; /* heap size for dataset with external storage */ + unsigned long nexternal; /* Number of external files for a dataset */ + int local; /* Flag to indicate iteration over the object*/ +} iter_t; + +static const char *drivername = NULL; + +#ifdef H5_HAVE_ROS3_VFD +/* Default "anonymous" S3 configuration */ +static H5FD_ros3_fapl_t ros3_fa = { + 1, /* Structure Version */ + FALSE, /* Authenticate? */ + "", /* AWS Region */ + "", /* Access Key ID */ + "", /* Secret Access Key */ +}; +#endif /* H5_HAVE_ROS3_VFD */ + +#ifdef H5_HAVE_LIBHDFS +/* "Default" HDFS configuration */ +static H5FD_hdfs_fapl_t hdfs_fa = { + 1, /* Structure Version */ + "localhost", /* Namenode Name */ + 0, /* Namenode Port */ + "", /* Kerberos ticket cache */ + "", /* User name */ + 2048, /* Stream buffer size */ +}; +#endif /* H5_HAVE_LIBHDFS */ + +static int display_all = TRUE; + +/* Enable the printing of selected statistics */ +static int display_file = FALSE; /* display file information */ +static int display_group = FALSE; /* display groups information */ +static int display_dset = FALSE; /* display datasets information */ +static int display_dset_dtype_meta = FALSE; /* display datasets' datatype information */ +static int display_attr = FALSE; /* display attributes information */ +static int display_free_sections = FALSE; /* display free space information */ +static int display_summary = FALSE; /* display summary of file space information */ + +static int display_file_metadata = FALSE; /* display file space info for file's metadata */ +static int display_group_metadata = FALSE; /* display file space info for groups' metadata */ +static int display_dset_metadata = FALSE; /* display file space info for datasets' metadata */ + +static int display_object = FALSE; /* not implemented yet */ + +/* Initialize threshold for small groups/datasets/attributes */ +static int sgroups_threshold = DEF_SIZE_SMALL_GROUPS; +static int sdsets_threshold = DEF_SIZE_SMALL_DSETS; +static int sattrs_threshold = DEF_SIZE_SMALL_ATTRS; + +/* a structure for handling the order command-line parameters come in */ +struct handler_t { + size_t obj_count; + char **obj; +}; + +static const char *s_opts = "Aa:Ddm:E*FfhGgl:sSTO:Vw:H:"; +/* e.g. "filemetadata" has to precede "file"; "groupmetadata" has to precede "group" etc. */ +static struct h5_long_options l_opts[] = {{"help", no_arg, 'h'}, + {"filemetadata", no_arg, 'F'}, + {"groupmetadata", no_arg, 'G'}, + {"links", require_arg, 'l'}, + {"dsetmetadata", no_arg, 'D'}, + {"dims", require_arg, 'm'}, + {"dtypemetadata", no_arg, 'T'}, + {"object", require_arg, 'O'}, + {"version", no_arg, 'V'}, + {"attribute", no_arg, 'A'}, + {"enable-error-stack", optional_arg, 'E'}, + {"numattrs", require_arg, 'a'}, + {"freespace", no_arg, 's'}, + {"summary", no_arg, 'S'}, + {"s3-cred", require_arg, 'w'}, + {"hdfs-attrs", require_arg, 'H'}, + {NULL, 0, '\0'}}; + +static void +leave(int ret) +{ + h5tools_close(); + HDexit(ret); +} + +/*------------------------------------------------------------------------- + * Function: usage + * + * Purpose: Compute the ceiling of log_10(x) + * + * Return: >0 on success, 0 on failure + * + *------------------------------------------------------------------------- + */ +static void +usage(const char *prog) +{ + HDfflush(stdout); + HDfprintf(stdout, "Usage: %s [OPTIONS] file\n", prog); + HDfprintf(stdout, "\n"); + HDfprintf(stdout, " ERROR\n"); + HDfprintf(stdout, " --enable-error-stack Prints messages from the HDF5 error stack as they occur\n"); + HDfprintf(stdout, " Optional value 2 also prints file open errors\n"); + HDfprintf(stdout, " OPTIONS\n"); + HDfprintf(stdout, " -h, --help Print a usage message and exit\n"); + HDfprintf(stdout, " -V, --version Print version number and exit\n"); + HDfprintf(stdout, " -f, --file Print file information\n"); + HDfprintf(stdout, " -F, --filemetadata Print file space information for file's metadata\n"); + HDfprintf(stdout, " -g, --group Print group information\n"); + HDfprintf(stdout, " -l N, --links=N Set the threshold for the # of links when printing\n"); + HDfprintf(stdout, " information for small groups. N is an integer greater\n"); + HDfprintf(stdout, " than 0. The default threshold is 10.\n"); + HDfprintf(stdout, " -G, --groupmetadata Print file space information for groups' metadata\n"); + HDfprintf(stdout, " -d, --dset Print dataset information\n"); + HDfprintf(stdout, " -m N, --dims=N Set the threshold for the dimension sizes when printing\n"); + HDfprintf(stdout, + " information for small datasets. N is an integer greater\n"); + HDfprintf(stdout, " than 0. The default threshold is 10.\n"); + HDfprintf(stdout, " -D, --dsetmetadata Print file space information for datasets' metadata\n"); + HDfprintf(stdout, " -T, --dtypemetadata Print datasets' datatype information\n"); + HDfprintf(stdout, " -A, --attribute Print attribute information\n"); + HDfprintf(stdout, " -a N, --numattrs=N Set the threshold for the # of attributes when printing\n"); + HDfprintf(stdout, + " information for small # of attributes. N is an integer greater\n"); + HDfprintf(stdout, " than 0. The default threshold is 10.\n"); + HDfprintf(stdout, " -s, --freespace Print free space information\n"); + HDfprintf(stdout, " -S, --summary Print summary of file space information\n"); + HDfprintf(stdout, " --s3-cred=<cred> Access file on S3, using provided credential\n"); + HDfprintf(stdout, " <cred> :: (region,id,key)\n"); + HDfprintf(stdout, " If <cred> == \"(,,)\", no authentication is used.\n"); + HDfprintf(stdout, " --hdfs-attrs=<attrs> Access a file on HDFS with given configuration\n"); + HDfprintf(stdout, " attributes.\n"); + HDfprintf(stdout, " <attrs> :: (<namenode name>,<namenode port>,\n"); + HDfprintf(stdout, " <kerberos cache path>,<username>,\n"); + HDfprintf(stdout, " <buffer size>)\n"); + HDfprintf(stdout, " If an attribute is empty, a default value will be\n"); + HDfprintf(stdout, " used.\n"); +} + +/*------------------------------------------------------------------------- + * Function: ceil_log10 + * + * Purpose: Compute the ceiling of log_10(x) + * + * Return: >0 on success, 0 on failure + * + * Programmer: Quincey Koziol + * Monday, August 22, 2005 + * + *------------------------------------------------------------------------- + */ +H5_ATTR_CONST static unsigned +ceil_log10(unsigned long x) +{ + unsigned long pow10 = 1; + unsigned ret = 0; + + while (x >= pow10) { + pow10 *= 10; + ret++; + } /* end while */ + + return ret; +} /* ceil_log10() */ + +/*------------------------------------------------------------------------- + * Function: attribute_stats + * + * Purpose: Gather statistics about attributes on an object + * + * Return: Success: 0 + * + * Failure: -1 + * + * Programmer: Quincey Koziol + * Tuesday, July 17, 2007 + * + *------------------------------------------------------------------------- + */ +static herr_t +attribute_stats(iter_t *iter, const H5O_info2_t *oi, const H5O_native_info_t *native_oi) +{ + unsigned bin; /* "bin" the number of objects falls in */ + + /* Update dataset & attribute metadata info */ + iter->attrs_btree_storage_size += native_oi->meta_size.attr.index_size; + iter->attrs_heap_storage_size += native_oi->meta_size.attr.heap_size; + + /* Update small # of attribute count & limits */ + if (oi->num_attrs <= (hsize_t)sattrs_threshold) + (iter->num_small_attrs[(size_t)oi->num_attrs])++; + if (oi->num_attrs > iter->max_attrs) + iter->max_attrs = oi->num_attrs; + + /* Add attribute count to proper bin */ + bin = ceil_log10((unsigned long)oi->num_attrs); + if ((bin + 1) > iter->attr_nbins) { + iter->attr_bins = (unsigned long *)HDrealloc(iter->attr_bins, (bin + 1) * sizeof(unsigned long)); + HDassert(iter->attr_bins); + + /* Initialize counts for intermediate bins */ + while (iter->attr_nbins < bin) + iter->attr_bins[iter->attr_nbins++] = 0; + iter->attr_nbins++; + + /* Initialize count for new bin */ + iter->attr_bins[bin] = 1; + } /* end if */ + else + (iter->attr_bins[bin])++; + + return 0; +} /* end attribute_stats() */ + +/*------------------------------------------------------------------------- + * Function: group_stats + * + * Purpose: Gather statistics about the group + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Quincey Koziol + * Tuesday, August 16, 2005 + * + * Modifications: Refactored code from the walk_function + * EIP, Wednesday, August 16, 2006 + * + * Vailin Choi 12 July 2007 + * 1. Gathered storage info for btree and heap + * (groups and attributes) + * 2. Gathered info for attributes + * + * Vailin Choi 14 July 2007 + * Cast "num_objs" and "num_attrs" to size_t + * Due to the -Mbounds problem for the pgi-32 bit compiler on indexing + * + *------------------------------------------------------------------------- + */ +static herr_t +group_stats(iter_t *iter, const char *name, const H5O_info2_t *oi, const H5O_native_info_t *native_oi) +{ + H5G_info_t ginfo; /* Group information */ + unsigned bin; /* "bin" the number of objects falls in */ + herr_t ret_value = SUCCEED; + + /* Gather statistics about this type of object */ + iter->uniq_groups++; + + /* Get object header information */ + iter->group_ohdr_info.total_size += native_oi->hdr.space.total; + iter->group_ohdr_info.free_size += native_oi->hdr.space.free; + + /* Get group information */ + if ((ret_value = H5Gget_info_by_name(iter->fid, name, &ginfo, H5P_DEFAULT)) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "H5Gget_info_by_name() failed"); + + /* Update link stats */ + /* Collect statistics for small groups */ + if (ginfo.nlinks < (hsize_t)sgroups_threshold) + (iter->num_small_groups[(size_t)ginfo.nlinks])++; + /* Determine maximum link count */ + if (ginfo.nlinks > iter->max_fanout) + iter->max_fanout = ginfo.nlinks; + + /* Add group count to proper bin */ + bin = ceil_log10((unsigned long)ginfo.nlinks); + if ((bin + 1) > iter->group_nbins) { + /* Allocate more storage for info about dataset's datatype */ + if ((iter->group_bins = + (unsigned long *)HDrealloc(iter->group_bins, (bin + 1) * sizeof(unsigned long))) == NULL) + H5TOOLS_GOTO_ERROR(FAIL, "H5Drealloc() failed"); + + /* Initialize counts for intermediate bins */ + while (iter->group_nbins < bin) + iter->group_bins[iter->group_nbins++] = 0; + iter->group_nbins++; + + /* Initialize count for new bin */ + iter->group_bins[bin] = 1; + } /* end if */ + else + (iter->group_bins[bin])++; + + /* Update group metadata info */ + iter->groups_btree_storage_size += native_oi->meta_size.obj.index_size; + iter->groups_heap_storage_size += native_oi->meta_size.obj.heap_size; + + /* Update attribute metadata info */ + if ((ret_value = attribute_stats(iter, oi, native_oi)) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "attribute_stats failed"); + +done: + return ret_value; +} /* end group_stats() */ + +/*------------------------------------------------------------------------- + * Function: dataset_stats + * + * Purpose: Gather statistics about the dataset + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Quincey Koziol + * Tuesday, August 16, 2005 + * + *------------------------------------------------------------------------- + */ +static herr_t +dataset_stats(iter_t *iter, const char *name, const H5O_info2_t *oi, const H5O_native_info_t *native_oi) +{ + unsigned bin; /* "bin" the number of objects falls in */ + hid_t did; /* Dataset ID */ + hid_t sid; /* Dataspace ID */ + hid_t tid; /* Datatype ID */ + hid_t dcpl; /* Dataset creation property list ID */ + hsize_t dims[H5S_MAX_RANK]; /* Dimensions of dataset */ + H5D_layout_t lout; /* Layout of dataset */ + unsigned type_found; /* Whether the dataset's datatype was */ + /* already found */ + int ndims; /* Number of dimensions of dataset */ + hsize_t storage; /* Size of dataset storage */ + unsigned u; /* Local index variable */ + int num_ext; /* Number of external files for a dataset */ + int nfltr; /* Number of filters for a dataset */ + H5Z_filter_t fltr; /* Filter identifier */ + herr_t ret_value = SUCCEED; + + /* Gather statistics about this type of object */ + iter->uniq_dsets++; + + /* Get object header information */ + iter->dset_ohdr_info.total_size += native_oi->hdr.space.total; + iter->dset_ohdr_info.free_size += native_oi->hdr.space.free; + + if ((did = H5Dopen2(iter->fid, name, H5P_DEFAULT)) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "H5Dopen() failed"); + + /* Update dataset metadata info */ + iter->datasets_index_storage_size += native_oi->meta_size.obj.index_size; + iter->datasets_heap_storage_size += native_oi->meta_size.obj.heap_size; + + /* Update attribute metadata info */ + if ((ret_value = attribute_stats(iter, oi, native_oi)) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "attribute_stats() failed"); + + /* Get storage info */ + /* Failure 0 indistinguishable from no-data-stored 0 */ + storage = H5Dget_storage_size(did); + + /* Gather layout statistics */ + if ((dcpl = H5Dget_create_plist(did)) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "H5Dget_create_plist() failed"); + + if ((lout = H5Pget_layout(dcpl)) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "H5Pget_layout() failed"); + + /* Object header's total size for H5D_COMPACT layout includes raw data size */ + /* "storage" also includes H5D_COMPACT raw data size */ + if (lout == H5D_COMPACT) + iter->dset_ohdr_info.total_size -= storage; + + /* Track the layout type for dataset */ + (iter->dset_layouts[lout])++; + + /* Get the number of external files for the dataset */ + if ((num_ext = H5Pget_external_count(dcpl)) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "H5Pget_external_count() failed"); + + /* Accumulate raw data size accordingly */ + if (num_ext) { + iter->nexternal += (unsigned long)num_ext; + iter->dset_external_storage_size += (unsigned long)storage; + } + else + iter->dset_storage_size += storage; + + /* Gather dataspace statistics */ + if ((sid = H5Dget_space(did)) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "H5Sget_space() failed"); + + if ((ndims = H5Sget_simple_extent_dims(sid, dims, NULL)) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "H5Sget_simple_extent_dims() failed"); + + /* Check for larger rank of dataset */ + if ((unsigned)ndims > iter->max_dset_rank) + iter->max_dset_rank = (unsigned)ndims; + + /* Track the number of datasets with each rank */ + (iter->dset_rank_count[ndims])++; + + /* Only gather dim size statistics on 1-D datasets */ + if (ndims == 1) { + /* Determine maximum dimension size */ + if (dims[0] > iter->max_dset_dims) + iter->max_dset_dims = dims[0]; + /* Collect statistics for small datasets */ + if (dims[0] < (hsize_t)sdsets_threshold) + (iter->small_dset_dims[(size_t)dims[0]])++; + + /* Add dim count to proper bin */ + bin = ceil_log10((unsigned long)dims[0]); + if ((bin + 1) > iter->dset_dim_nbins) { + /* Allocate more storage for info about dataset's datatype */ + if ((iter->dset_dim_bins = (unsigned long *)HDrealloc(iter->dset_dim_bins, + (bin + 1) * sizeof(unsigned long))) == NULL) + H5TOOLS_GOTO_ERROR(FAIL, "H5Drealloc() failed"); + + /* Initialize counts for intermediate bins */ + while (iter->dset_dim_nbins < bin) + iter->dset_dim_bins[iter->dset_dim_nbins++] = 0; + iter->dset_dim_nbins++; + + /* Initialize count for this bin */ + iter->dset_dim_bins[bin] = 1; + } /* end if */ + else + (iter->dset_dim_bins[bin])++; + } /* end if */ + + if (H5Sclose(sid) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "H5Sclose() failed"); + + /* Gather datatype statistics */ + if ((tid = H5Dget_type(did)) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "H5Dget_type() failed"); + + type_found = FALSE; + for (u = 0; u < iter->dset_ntypes; u++) + if (H5Tequal(iter->dset_type_info[u].tid, tid) > 0) { + type_found = TRUE; + break; + } /* end for */ + + if (type_found) + (iter->dset_type_info[u].count)++; + else { + unsigned curr_ntype = (unsigned)iter->dset_ntypes; + + /* Increment # of datatypes seen for datasets */ + iter->dset_ntypes++; + + /* Allocate more storage for info about dataset's datatype */ + if ((iter->dset_type_info = (dtype_info_t *)HDrealloc( + iter->dset_type_info, iter->dset_ntypes * sizeof(dtype_info_t))) == NULL) + H5TOOLS_GOTO_ERROR(FAIL, "H5Drealloc() failed"); + + /* Initialize information about datatype */ + if ((iter->dset_type_info[curr_ntype].tid = H5Tcopy(tid)) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "H5Tcopy() failed"); + iter->dset_type_info[curr_ntype].count = 1; + iter->dset_type_info[curr_ntype].named = 0; + + /* Set index for later */ + u = curr_ntype; + } /* end else */ + + /* Check if the datatype is a named datatype */ + if (H5Tcommitted(tid) > 0) + (iter->dset_type_info[u].named)++; + + if (H5Tclose(tid) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "H5Tclose() failed"); + + /* Track different filters */ + if ((nfltr = H5Pget_nfilters(dcpl)) >= 0) { + if (nfltr == 0) + iter->dset_comptype[0]++; + for (u = 0; u < (unsigned)nfltr; u++) { + fltr = H5Pget_filter2(dcpl, u, 0, 0, 0, 0, 0, NULL); + if (fltr >= 0) { + if (fltr < (H5_NFILTERS_IMPL - 1)) + iter->dset_comptype[fltr]++; + else + iter->dset_comptype[H5_NFILTERS_IMPL - 1]++; /*other filters*/ + } /* end if */ + } /* end for */ + } /* endif nfltr */ + + if (H5Pclose(dcpl) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "H5Pclose() failed"); + + if (H5Dclose(did) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "H5Dclose() failed"); + +done: + return ret_value; +} /* end dataset_stats() */ + +/*------------------------------------------------------------------------- + * Function: datatype_stats + * + * Purpose: Gather statistics about the datatype + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Vailin Choi; July 7th, 2009 + * + *------------------------------------------------------------------------- + */ +static herr_t +datatype_stats(iter_t *iter, const H5O_info2_t *oi, const H5O_native_info_t *native_oi) +{ + herr_t ret_value = SUCCEED; + + /* Gather statistics about this type of object */ + iter->uniq_dtypes++; + + /* Get object header information */ + iter->dtype_ohdr_info.total_size += native_oi->hdr.space.total; + iter->dtype_ohdr_info.free_size += native_oi->hdr.space.free; + + /* Update attribute metadata info */ + if ((ret_value = attribute_stats(iter, oi, native_oi)) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "attribute_stats() failed"); +done: + return ret_value; +} /* end datatype_stats() */ + +/*------------------------------------------------------------------------- + * Function: obj_stats + * + * Purpose: Gather statistics about an object + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Quincey Koziol + * Tuesday, November 6, 2007 + * + *------------------------------------------------------------------------- + */ +static herr_t +obj_stats(const char *path, const H5O_info2_t *oi, const char *already_visited, void *_iter) +{ + H5O_native_info_t native_info; + iter_t *iter = (iter_t *)_iter; + herr_t ret_value = SUCCEED; + + /* If the object has already been seen then just return */ + if (NULL == already_visited) { + /* Retrieve the native info for the object */ + if (H5Oget_native_info_by_name(iter->fid, path, &native_info, H5O_NATIVE_INFO_ALL, H5P_DEFAULT) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "H5Oget_native_info_by_name failed"); + + /* Gather some general statistics about the object */ + if (oi->rc > iter->max_links) + iter->max_links = oi->rc; + + switch (oi->type) { + case H5O_TYPE_GROUP: + if (group_stats(iter, path, oi, &native_info) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "group_stats failed"); + break; + + case H5O_TYPE_DATASET: + if (dataset_stats(iter, path, oi, &native_info) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "dataset_stats failed"); + break; + + case H5O_TYPE_NAMED_DATATYPE: + if (datatype_stats(iter, oi, &native_info) < 0) + H5TOOLS_GOTO_ERROR(FAIL, "datatype_stats failed"); + break; + + case H5O_TYPE_MAP: + case H5O_TYPE_UNKNOWN: + case H5O_TYPE_NTYPES: + default: + /* Gather statistics about this type of object */ + iter->uniq_others++; + break; + } /* end switch */ + } /* end if */ + +done: + return ret_value; +} /* end obj_stats() */ + +/*------------------------------------------------------------------------- + * Function: lnk_stats + * + * Purpose: Gather statistics about a link + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Quincey Koziol + * Tuesday, November 6, 2007 + * + *------------------------------------------------------------------------- + */ +static herr_t +lnk_stats(const char H5_ATTR_UNUSED *path, const H5L_info2_t *li, void *_iter) +{ + iter_t *iter = (iter_t *)_iter; + + switch (li->type) { + case H5L_TYPE_SOFT: + case H5L_TYPE_EXTERNAL: + /* Gather statistics about links and UD links */ + iter->uniq_links++; + break; + + case H5L_TYPE_HARD: + case H5L_TYPE_MAX: + case H5L_TYPE_ERROR: + default: + /* Gather statistics about this type of object */ + iter->uniq_others++; + break; + } /* end switch() */ + + return 0; +} /* end lnk_stats() */ + +/*------------------------------------------------------------------------- + * Function: freespace_stats + * + * Purpose: Gather statistics for free space sections in the file + * + * Return: Success: 0 + * Failure: -1 + * + * Programmer: Vailin Choi; July 7th, 2009 + * + *------------------------------------------------------------------------- + */ +static herr_t +freespace_stats(hid_t fid, iter_t *iter) +{ + H5F_sect_info_t *sect_info = NULL; /* Free space sections */ + ssize_t nsects; /* Number of free space sections */ + size_t u; /* Local index variable */ + + /* Query section information */ + if ((nsects = H5Fget_free_sections(fid, H5FD_MEM_DEFAULT, 0, NULL)) < 0) + return (FAIL); + else if (nsects) { + if (NULL == (sect_info = (H5F_sect_info_t *)HDcalloc((size_t)nsects, sizeof(H5F_sect_info_t)))) + return (FAIL); + nsects = H5Fget_free_sections(fid, H5FD_MEM_DEFAULT, (size_t)nsects, sect_info); + HDassert(nsects); + } /* end else-if */ + + for (u = 0; u < (size_t)nsects; u++) { + unsigned bin; /* "bin" the number of objects falls in */ + + if (sect_info[u].size < SIZE_SMALL_SECTS) + (iter->num_small_sects[(size_t)sect_info[u].size])++; + + /* Add section size to proper bin */ + bin = ceil_log10((unsigned long)sect_info[u].size); + if (bin >= iter->sect_nbins) { + /* Allocate more storage for section info */ + iter->sect_bins = (unsigned long *)HDrealloc(iter->sect_bins, (bin + 1) * sizeof(unsigned long)); + HDassert(iter->sect_bins); + + /* Initialize counts for intermediate bins */ + while (iter->sect_nbins < bin) + iter->sect_bins[iter->sect_nbins++] = 0; + iter->sect_nbins++; + + /* Initialize count for this bin */ + iter->sect_bins[bin] = 1; + } /* end if */ + else + (iter->sect_bins[bin])++; + } /* end for */ + + if (sect_info) + HDfree(sect_info); + + return 0; +} /* end freespace_stats() */ + +/*------------------------------------------------------------------------- + * Function: hand_free + * + * Purpose: Free handler structure + * + * Return: Success: 0 + * + * Failure: Never fails + * + *------------------------------------------------------------------------- + */ +static void +hand_free(struct handler_t *hand) +{ + if (hand) { + unsigned u; + + for (u = 0; u < hand->obj_count; u++) + if (hand->obj[u]) { + HDfree(hand->obj[u]); + hand->obj[u] = NULL; + } /* end if */ + hand->obj_count = 0; + HDfree(hand->obj); + HDfree(hand); + } /* end if */ +} /* end hand_free() */ + +/*------------------------------------------------------------------------- + * Function: parse_command_line + * + * Purpose: Parses command line and sets up global variable to control output + * + * Return: Success: 0 + * + * Failure: -1 + * + * Programmer: Elena Pourmal + * Saturday, August 12, 2006 + * + *------------------------------------------------------------------------- + */ +static int +parse_command_line(int argc, const char *const *argv, struct handler_t **hand_ret) +{ + int opt; + unsigned u; + struct handler_t *hand = NULL; + + /* parse command line options */ + while ((opt = H5_get_option(argc, argv, s_opts, l_opts)) != EOF) { + switch ((char)opt) { + case 'h': + usage(h5tools_getprogname()); + h5tools_setstatus(EXIT_SUCCESS); + goto done; + break; + + case 'V': + print_version(h5tools_getprogname()); + h5tools_setstatus(EXIT_SUCCESS); + goto done; + break; + + case 'E': + if (H5_optarg != NULL) + enable_error_stack = HDatoi(H5_optarg); + else + enable_error_stack = 1; + break; + + case 'F': + display_all = FALSE; + display_file_metadata = TRUE; + break; + + case 'f': + display_all = FALSE; + display_file = TRUE; + break; + + case 'G': + display_all = FALSE; + display_group_metadata = TRUE; + break; + + case 'g': + display_all = FALSE; + display_group = TRUE; + break; + + case 'l': + if (H5_optarg) { + sgroups_threshold = HDatoi(H5_optarg); + if (sgroups_threshold < 1) { + error_msg("Invalid threshold for small groups\n"); + goto error; + } + } + else + error_msg("Missing threshold for small groups\n"); + + break; + + case 'D': + display_all = FALSE; + display_dset_metadata = TRUE; + break; + + case 'd': + display_all = FALSE; + display_dset = TRUE; + break; + + case 'm': + if (H5_optarg) { + sdsets_threshold = HDatoi(H5_optarg); + if (sdsets_threshold < 1) { + error_msg("Invalid threshold for small datasets\n"); + goto error; + } + } + else + error_msg("Missing threshold for small datasets\n"); + + break; + + case 'T': + display_all = FALSE; + display_dset_dtype_meta = TRUE; + break; + + case 'A': + display_all = FALSE; + display_attr = TRUE; + break; + + case 'a': + if (H5_optarg) { + sattrs_threshold = HDatoi(H5_optarg); + if (sattrs_threshold < 1) { + error_msg("Invalid threshold for small # of attributes\n"); + goto error; + } + } + else + error_msg("Missing threshold for small # of attributes\n"); + + break; + + case 's': + display_all = FALSE; + display_free_sections = TRUE; + break; + + case 'S': + display_all = FALSE; + display_summary = TRUE; + break; + + case 'O': + display_all = FALSE; + display_object = TRUE; + + /* Allocate space to hold the command line info */ + if (NULL == (hand = (struct handler_t *)HDcalloc((size_t)1, sizeof(struct handler_t)))) { + error_msg("unable to allocate memory for object struct\n"); + goto error; + } /* end if */ + + /* Allocate space to hold the object strings */ + hand->obj_count = (size_t)argc; + if (NULL == (hand->obj = (char **)HDcalloc((size_t)argc, sizeof(char *)))) { + error_msg("unable to allocate memory for object array\n"); + goto error; + } /* end if */ + + /* Store object names */ + for (u = 0; u < hand->obj_count; u++) + if (NULL == (hand->obj[u] = HDstrdup(H5_optarg))) { + error_msg("unable to allocate memory for object name\n"); + goto error; + } /* end if */ + break; + + case 'w': +#ifdef H5_HAVE_ROS3_VFD + if (h5tools_parse_ros3_fapl_tuple(H5_optarg, ',', &ros3_fa) < 0) { + error_msg("failed to parse S3 VFD credential info\n"); + goto error; + } + + drivername = drivernames[ROS3_VFD_IDX]; +#else + error_msg("Read-Only S3 VFD not enabled.\n"); + goto error; +#endif + break; + + case 'H': +#ifdef H5_HAVE_LIBHDFS + if (h5tools_parse_hdfs_fapl_tuple(H5_optarg, ',', &hdfs_fa) < 0) { + error_msg("failed to parse HDFS VFD configuration info\n"); + goto error; + } + + drivername = drivernames[HDFS_VFD_IDX]; +#else + error_msg("HDFS VFD not enabled.\n"); + goto error; +#endif + break; + + default: + usage(h5tools_getprogname()); + goto error; + } /* end switch */ + } /* end while */ + + /* check for file name to be processed */ + if (argc <= H5_optind) { + error_msg("missing file name\n"); + usage(h5tools_getprogname()); + goto error; + } /* end if */ + + /* Set handler structure */ + *hand_ret = hand; + +done: + return 0; + +error: + hand_free(hand); + h5tools_setstatus(EXIT_FAILURE); + + return -1; +} + +/*------------------------------------------------------------------------- + * Function: iter_free + * + * Purpose: Free iter structure + * + * Return: Success: 0 + * + * Failure: Never fails + * + *------------------------------------------------------------------------- + */ +static void +iter_free(iter_t *iter) +{ + + /* Clear array of bins for group counts */ + if (iter->group_bins) { + HDfree(iter->group_bins); + iter->group_bins = NULL; + } /* end if */ + + /* Clear array for tracking small groups */ + if (iter->num_small_groups) { + HDfree(iter->num_small_groups); + iter->num_small_groups = NULL; + } /* end if */ + + /* Clear array of bins for attribute counts */ + if (iter->attr_bins) { + HDfree(iter->attr_bins); + iter->attr_bins = NULL; + } /* end if */ + + /* Clear array for tracking small attributes */ + if (iter->num_small_attrs) { + HDfree(iter->num_small_attrs); + iter->num_small_attrs = NULL; + } /* end if */ + + /* Clear dataset datatype information found */ + if (iter->dset_type_info) { + HDfree(iter->dset_type_info); + iter->dset_type_info = NULL; + } /* end if */ + + /* Clear array of bins for dataset dimensions */ + if (iter->dset_dim_bins) { + HDfree(iter->dset_dim_bins); + iter->dset_dim_bins = NULL; + } /* end if */ + + /* Clear array of tracking 1-D small datasets */ + if (iter->small_dset_dims) { + HDfree(iter->small_dset_dims); + iter->small_dset_dims = NULL; + } /* end if */ + + /* Clear array of bins for free-space section sizes */ + if (iter->sect_bins) { + HDfree(iter->sect_bins); + iter->sect_bins = NULL; + } /* end if */ +} /* end iter_free() */ + +/*------------------------------------------------------------------------- + * Function: print_file_info + * + * Purpose: Prints information about file + * + * Return: Success: 0 + * + * Failure: Never fails + * + * Programmer: Elena Pourmal + * Saturday, August 12, 2006 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static herr_t +print_file_info(const iter_t *iter) +{ + HDprintf("File information\n"); + HDprintf("\t# of unique groups: %lu\n", iter->uniq_groups); + HDprintf("\t# of unique datasets: %lu\n", iter->uniq_dsets); + HDprintf("\t# of unique named datatypes: %lu\n", iter->uniq_dtypes); + HDprintf("\t# of unique links: %lu\n", iter->uniq_links); + HDprintf("\t# of unique other: %lu\n", iter->uniq_others); + HDprintf("\tMax. # of links to object: %lu\n", iter->max_links); + HDfprintf(stdout, "\tMax. # of objects in group: %" PRIuHSIZE "\n", iter->max_fanout); + + return 0; +} /* print_file_info() */ + +/*------------------------------------------------------------------------- + * Function: print_file_metadata + * + * Purpose: Prints file space information for file's metadata + * + * Return: Success: 0 + * + * Failure: Never fails + * + * Programmer: Elena Pourmal + * Saturday, August 12, 2006 + * + *------------------------------------------------------------------------- + */ +static herr_t +print_file_metadata(const iter_t *iter) +{ + HDfprintf(stdout, "File space information for file metadata (in bytes):\n"); + HDfprintf(stdout, "\tSuperblock: %" PRIuHSIZE "\n", iter->super_size); + HDfprintf(stdout, "\tSuperblock extension: %" PRIuHSIZE "\n", iter->super_ext_size); + HDfprintf(stdout, "\tUser block: %" PRIuHSIZE "\n", iter->ublk_size); + + HDfprintf(stdout, "\tObject headers: (total/unused)\n"); + HDfprintf(stdout, "\t\tGroups: %" PRIuHSIZE "/%" PRIuHSIZE "\n", iter->group_ohdr_info.total_size, + iter->group_ohdr_info.free_size); + HDfprintf(stdout, "\t\tDatasets(exclude compact data): %" PRIuHSIZE "/%" PRIuHSIZE "\n", + iter->dset_ohdr_info.total_size, iter->dset_ohdr_info.free_size); + HDfprintf(stdout, "\t\tDatatypes: %" PRIuHSIZE "/%" PRIuHSIZE "\n", iter->dtype_ohdr_info.total_size, + iter->dtype_ohdr_info.free_size); + + HDfprintf(stdout, "\tGroups:\n"); + HDfprintf(stdout, "\t\tB-tree/List: %" PRIuHSIZE "\n", iter->groups_btree_storage_size); + HDfprintf(stdout, "\t\tHeap: %" PRIuHSIZE "\n", iter->groups_heap_storage_size); + + HDfprintf(stdout, "\tAttributes:\n"); + HDfprintf(stdout, "\t\tB-tree/List: %" PRIuHSIZE "\n", iter->attrs_btree_storage_size); + HDfprintf(stdout, "\t\tHeap: %" PRIuHSIZE "\n", iter->attrs_heap_storage_size); + + HDfprintf(stdout, "\tChunked datasets:\n"); + HDfprintf(stdout, "\t\tIndex: %" PRIuHSIZE "\n", iter->datasets_index_storage_size); + + HDfprintf(stdout, "\tDatasets:\n"); + HDfprintf(stdout, "\t\tHeap: %" PRIuHSIZE "\n", iter->datasets_heap_storage_size); + + HDfprintf(stdout, "\tShared Messages:\n"); + HDfprintf(stdout, "\t\tHeader: %" PRIuHSIZE "\n", iter->SM_hdr_storage_size); + HDfprintf(stdout, "\t\tB-tree/List: %" PRIuHSIZE "\n", iter->SM_index_storage_size); + HDfprintf(stdout, "\t\tHeap: %" PRIuHSIZE "\n", iter->SM_heap_storage_size); + + HDfprintf(stdout, "\tFree-space managers:\n"); + HDfprintf(stdout, "\t\tHeader: %" PRIuHSIZE "\n", iter->free_hdr); + HDfprintf(stdout, "\t\tAmount of free space: %" PRIuHSIZE "\n", iter->free_space); + + return 0; +} /* print_file_metadata() */ + +/*------------------------------------------------------------------------- + * Function: print_group_info + * + * Purpose: Prints information about groups in the file + * + * Return: Success: 0 + * + * Failure: Never fails + * + * Programmer: Elena Pourmal + * Saturday, August 12, 2006 + * + * Modifications: + * bug #1253; Oct 6th 2008; Vailin Choi + * Fixed segmentation fault: print iter->group_bins[0] when + * there is iter->group_nbins + * + *------------------------------------------------------------------------- + */ +static herr_t +print_group_info(const iter_t *iter) +{ + unsigned long power; /* Temporary "power" for bins */ + unsigned long total; /* Total count for various statistics */ + unsigned u; /* Local index variable */ + + HDprintf("Small groups (with 0 to %u links):\n", sgroups_threshold - 1); + total = 0; + for (u = 0; u < (unsigned)sgroups_threshold; u++) { + if (iter->num_small_groups[u] > 0) { + HDprintf("\t# of groups with %u link(s): %lu\n", u, iter->num_small_groups[u]); + total += iter->num_small_groups[u]; + } /* end if */ + } /* end for */ + HDprintf("\tTotal # of small groups: %lu\n", total); + + HDprintf("Group bins:\n"); + total = 0; + if ((iter->group_nbins > 0) && (iter->group_bins[0] > 0)) { + HDprintf("\t# of groups with 0 link: %lu\n", iter->group_bins[0]); + total = iter->group_bins[0]; + } /* end if */ + power = 1; + for (u = 1; u < iter->group_nbins; u++) { + if (iter->group_bins[u] > 0) { + HDprintf("\t# of groups with %lu - %lu links: %lu\n", power, (power * 10) - 1, + iter->group_bins[u]); + total += iter->group_bins[u]; + } /* end if */ + power *= 10; + } /* end for */ + HDprintf("\tTotal # of groups: %lu\n", total); + + return 0; +} /* print_group_info() */ + +/*------------------------------------------------------------------------- + * Function: print_group_metadata + * + * Purpose: Prints file space information for groups' metadata + * + * Return: Success: 0 + * Failure: Never fails + * + * Programmer: Vailin Choi; October 2009 + * + *------------------------------------------------------------------------- + */ +static herr_t +print_group_metadata(const iter_t *iter) +{ + HDprintf("File space information for groups' metadata (in bytes):\n"); + + HDfprintf(stdout, "\tObject headers (total/unused): %" PRIuHSIZE "/%" PRIuHSIZE "\n", + iter->group_ohdr_info.total_size, iter->group_ohdr_info.free_size); + + HDfprintf(stdout, "\tB-tree/List: %" PRIuHSIZE "\n", iter->groups_btree_storage_size); + HDfprintf(stdout, "\tHeap: %" PRIuHSIZE "\n", iter->groups_heap_storage_size); + + return 0; +} /* print_group_metadata() */ + +/*------------------------------------------------------------------------- + * Function: print_dataset_info + * + * Purpose: Prints information about datasets in the file + * + * Return: Success: 0 + * Failure: Never fails + * + * Programmer: Elena Pourmal + * Saturday, August 12, 2006 + * + *------------------------------------------------------------------------- + */ +static herr_t +print_dataset_info(const iter_t *iter) +{ + unsigned long power; /* Temporary "power" for bins */ + unsigned long total; /* Total count for various statistics */ + unsigned u; /* Local index variable */ + + if (iter->uniq_dsets > 0) { + HDprintf("Dataset dimension information:\n"); + HDprintf("\tMax. rank of datasets: %u\n", iter->max_dset_rank); + HDprintf("\tDataset ranks:\n"); + for (u = 0; u < H5S_MAX_RANK; u++) + if (iter->dset_rank_count[u] > 0) + HDprintf("\t\t# of dataset with rank %u: %lu\n", u, iter->dset_rank_count[u]); + + HDprintf("1-D Dataset information:\n"); + HDfprintf(stdout, "\tMax. dimension size of 1-D datasets: %" PRIuHSIZE "\n", iter->max_dset_dims); + HDprintf("\tSmall 1-D datasets (with dimension sizes 0 to %u):\n", sdsets_threshold - 1); + total = 0; + for (u = 0; u < (unsigned)sdsets_threshold; u++) { + if (iter->small_dset_dims[u] > 0) { + HDprintf("\t\t# of datasets with dimension sizes %u: %lu\n", u, iter->small_dset_dims[u]); + total += iter->small_dset_dims[u]; + } /* end if */ + } /* end for */ + HDprintf("\t\tTotal # of small datasets: %lu\n", total); + + /* Protect against no datasets in file */ + if (iter->dset_dim_nbins > 0) { + HDprintf("\t1-D Dataset dimension bins:\n"); + total = 0; + if (iter->dset_dim_bins[0] > 0) { + HDprintf("\t\t# of datasets with dimension size 0: %lu\n", iter->dset_dim_bins[0]); + total = iter->dset_dim_bins[0]; + } /* end if */ + power = 1; + for (u = 1; u < iter->dset_dim_nbins; u++) { + if (iter->dset_dim_bins[u] > 0) { + HDprintf("\t\t# of datasets with dimension size %lu - %lu: %lu\n", power, + (power * 10) - 1, iter->dset_dim_bins[u]); + total += iter->dset_dim_bins[u]; + } /* end if */ + power *= 10; + } /* end for */ + HDprintf("\t\tTotal # of datasets: %lu\n", total); + } /* end if */ + + HDprintf("Dataset storage information:\n"); + HDfprintf(stdout, "\tTotal raw data size: %" PRIuHSIZE "\n", iter->dset_storage_size); + HDfprintf(stdout, "\tTotal external raw data size: %" PRIuHSIZE "\n", + iter->dset_external_storage_size); + + HDprintf("Dataset layout information:\n"); + for (u = 0; u < H5D_NLAYOUTS; u++) + HDprintf("\tDataset layout counts[%s]: %lu\n", + (u == H5D_COMPACT + ? "COMPACT" + : (u == H5D_CONTIGUOUS ? "CONTIG" : (u == H5D_CHUNKED ? "CHUNKED" : "VIRTUAL"))), + iter->dset_layouts[u]); + HDprintf("\tNumber of external files : %lu\n", iter->nexternal); + + HDprintf("Dataset filters information:\n"); + HDprintf("\tNumber of datasets with:\n"); + HDprintf("\t\tNO filter: %lu\n", iter->dset_comptype[H5Z_FILTER_ERROR + 1]); + HDprintf("\t\tGZIP filter: %lu\n", iter->dset_comptype[H5Z_FILTER_DEFLATE]); + HDprintf("\t\tSHUFFLE filter: %lu\n", iter->dset_comptype[H5Z_FILTER_SHUFFLE]); + HDprintf("\t\tFLETCHER32 filter: %lu\n", iter->dset_comptype[H5Z_FILTER_FLETCHER32]); + HDprintf("\t\tSZIP filter: %lu\n", iter->dset_comptype[H5Z_FILTER_SZIP]); + HDprintf("\t\tNBIT filter: %lu\n", iter->dset_comptype[H5Z_FILTER_NBIT]); + HDprintf("\t\tSCALEOFFSET filter: %lu\n", iter->dset_comptype[H5Z_FILTER_SCALEOFFSET]); + HDprintf("\t\tUSER-DEFINED filter: %lu\n", iter->dset_comptype[H5_NFILTERS_IMPL - 1]); + } /* end if */ + + return 0; +} /* print_dataset_info() */ + +/*------------------------------------------------------------------------- + * Function: print_dataset_metadata + * + * Purpose: Prints file space information for datasets' metadata + * + * Return: Success: 0 + * + * Failure: Never fails + * + * Programmer: Vailin Choi; October 2009 + * + *------------------------------------------------------------------------- + */ +static herr_t +print_dset_metadata(const iter_t *iter) +{ + HDprintf("File space information for datasets' metadata (in bytes):\n"); + + HDfprintf(stdout, "\tObject headers (total/unused): %" PRIuHSIZE "/%" PRIuHSIZE "\n", + iter->dset_ohdr_info.total_size, iter->dset_ohdr_info.free_size); + + HDfprintf(stdout, "\tIndex for Chunked datasets: %" PRIuHSIZE "\n", iter->datasets_index_storage_size); + HDfprintf(stdout, "\tHeap: %" PRIuHSIZE "\n", iter->datasets_heap_storage_size); + + return 0; +} /* print_dset_metadata() */ + +/*------------------------------------------------------------------------- + * Function: print_dset_dtype_meta + * + * Purpose: Prints datasets' datatype information + * + * Return: Success: 0 + * + * Failure: Never fails + * + * Programmer: Vailin Choi; October 2009 + * + *------------------------------------------------------------------------- + */ +static herr_t +print_dset_dtype_meta(const iter_t *iter) +{ + unsigned long total; /* Total count for various statistics */ + size_t dtype_size; /* Size of encoded datatype */ + unsigned u; /* Local index variable */ + + if (iter->dset_ntypes) { + HDprintf("Dataset datatype information:\n"); + HDprintf("\t# of unique datatypes used by datasets: %lu\n", iter->dset_ntypes); + total = 0; + for (u = 0; u < iter->dset_ntypes; u++) { + H5Tencode(iter->dset_type_info[u].tid, NULL, &dtype_size); + HDprintf("\tDataset datatype #%u:\n", u); + HDprintf("\t\tCount (total/named) = (%lu/%lu)\n", iter->dset_type_info[u].count, + iter->dset_type_info[u].named); + HDprintf("\t\tSize (desc./elmt) = (%lu/%lu)\n", (unsigned long)dtype_size, + (unsigned long)H5Tget_size(iter->dset_type_info[u].tid)); + H5Tclose(iter->dset_type_info[u].tid); + total += iter->dset_type_info[u].count; + } /* end for */ + HDprintf("\tTotal dataset datatype count: %lu\n", total); + } /* end if */ + + return 0; +} /* print_dset_dtype_meta() */ + +/*------------------------------------------------------------------------- + * Function: print_attr_info + * + * Purpose: Prints information about attributes in the file + * + * Return: Success: 0 + * + * Failure: Never fails + * + * Programmer: Vailin Choi + * July 12, 2007 + * + *------------------------------------------------------------------------- + */ +static herr_t +print_attr_info(const iter_t *iter) +{ + unsigned long power; /* Temporary "power" for bins */ + unsigned long total; /* Total count for various statistics */ + unsigned u; /* Local index variable */ + + HDprintf("Small # of attributes (objects with 1 to %u attributes):\n", sattrs_threshold); + total = 0; + for (u = 1; u <= (unsigned)sattrs_threshold; u++) { + if (iter->num_small_attrs[u] > 0) { + HDprintf("\t# of objects with %u attributes: %lu\n", u, iter->num_small_attrs[u]); + total += iter->num_small_attrs[u]; + } /* end if */ + } /* end for */ + HDprintf("\tTotal # of objects with small # of attributes: %lu\n", total); + + HDprintf("Attribute bins:\n"); + total = 0; + power = 1; + for (u = 1; u < iter->attr_nbins; u++) { + if (iter->attr_bins[u] > 0) { + HDprintf("\t# of objects with %lu - %lu attributes: %lu\n", power, (power * 10) - 1, + iter->attr_bins[u]); + total += iter->attr_bins[u]; + } /* end if */ + power *= 10; + } /* end for */ + HDprintf("\tTotal # of objects with attributes: %lu\n", total); + HDprintf("\tMax. # of attributes to objects: %lu\n", (unsigned long)iter->max_attrs); + + return 0; +} /* print_attr_info() */ + +/*------------------------------------------------------------------------- + * Function: print_freespace_info + * + * Purpose: Prints information about free space in the file + * + * Return: Success: 0 + * + * Failure: Never fails + * + * Programmer: Vailin Choi; July 7th, 2009 + * + *------------------------------------------------------------------------- + */ +static herr_t +print_freespace_info(const iter_t *iter) +{ + unsigned long power; /* Temporary "power" for bins */ + unsigned long total; /* Total count for various statistics */ + unsigned u; /* Local index variable */ + + HDfprintf(stdout, "Free-space persist: %s\n", iter->fs_persist ? "TRUE" : "FALSE"); + HDfprintf(stdout, "Free-space section threshold: %" PRIuHSIZE " bytes\n", iter->fs_threshold); + HDprintf("Small size free-space sections (< %u bytes):\n", (unsigned)SIZE_SMALL_SECTS); + total = 0; + for (u = 0; u < SIZE_SMALL_SECTS; u++) { + if (iter->num_small_sects[u] > 0) { + HDprintf("\t# of sections of size %u: %lu\n", u, iter->num_small_sects[u]); + total += iter->num_small_sects[u]; + } /* end if */ + } /* end for */ + HDprintf("\tTotal # of small size sections: %lu\n", total); + + HDprintf("Free-space section bins:\n"); + + total = 0; + power = 1; + for (u = 1; u < iter->sect_nbins; u++) { + if (iter->sect_bins[u] > 0) { + HDprintf("\t# of sections of size %lu - %lu: %lu\n", power, (power * 10) - 1, iter->sect_bins[u]); + total += iter->sect_bins[u]; + } /* end if */ + power *= 10; + } /* end for */ + HDprintf("\tTotal # of sections: %lu\n", total); + + return 0; +} /* print_freespace_info() */ + +/*------------------------------------------------------------------------- + * Function: print_storage_summary + * + * Purpose: Prints file space information for the file + * + * Return: Success: 0 + * + * Failure: Never fails + * + * Programmer: Vailin Choi; August 2009 + * + *------------------------------------------------------------------------- + */ +static herr_t +print_storage_summary(const iter_t *iter) +{ + hsize_t total_meta = 0; + hsize_t unaccount = 0; + double percent = 0.0; + + HDfprintf(stdout, "File space management strategy: %s\n", FS_STRATEGY_NAME[iter->fs_strategy]); + HDfprintf(stdout, "File space page size: %" PRIuHSIZE " bytes\n", iter->fsp_size); + HDprintf("Summary of file space information:\n"); + total_meta = + iter->super_size + iter->super_ext_size + iter->ublk_size + iter->group_ohdr_info.total_size + + iter->dset_ohdr_info.total_size + iter->dtype_ohdr_info.total_size + iter->groups_btree_storage_size + + iter->groups_heap_storage_size + iter->attrs_btree_storage_size + iter->attrs_heap_storage_size + + iter->datasets_index_storage_size + iter->datasets_heap_storage_size + iter->SM_hdr_storage_size + + iter->SM_index_storage_size + iter->SM_heap_storage_size + iter->free_hdr; + + HDfprintf(stdout, " File metadata: %" PRIuHSIZE " bytes\n", total_meta); + HDfprintf(stdout, " Raw data: %" PRIuHSIZE " bytes\n", iter->dset_storage_size); + + percent = ((double)iter->free_space / (double)iter->filesize) * 100.0; + HDfprintf(stdout, " Amount/Percent of tracked free space: %" PRIuHSIZE " bytes/%3.1f%%\n", + iter->free_space, percent); + + if (iter->filesize < (total_meta + iter->dset_storage_size + iter->free_space)) { + unaccount = (total_meta + iter->dset_storage_size + iter->free_space) - iter->filesize; + HDfprintf(stdout, " ??? File has %" PRIuHSIZE " more bytes accounted for than its size! ???\n", + unaccount); + } + else { + unaccount = iter->filesize - (total_meta + iter->dset_storage_size + iter->free_space); + HDfprintf(stdout, " Unaccounted space: %" PRIuHSIZE " bytes\n", unaccount); + } + + HDfprintf(stdout, "Total space: %" PRIuHSIZE " bytes\n", + total_meta + iter->dset_storage_size + iter->free_space + unaccount); + + if (iter->nexternal) + HDfprintf(stdout, "External raw data: %" PRIuHSIZE " bytes\n", iter->dset_external_storage_size); + + return 0; +} /* print_storage_summary() */ + +/*------------------------------------------------------------------------- + * Function: print_file_statistics + * + * Purpose: Prints file statistics + * + * Return: Success: 0 + * + * Failure: Never fails + * + * Programmer: Elena Pourmal + * Saturday, August 12, 2006 + * + *------------------------------------------------------------------------- + */ +static void +print_file_statistics(const iter_t *iter) +{ + if (display_all) { + display_file = TRUE; + display_group = TRUE; + display_dset = TRUE; + display_dset_dtype_meta = TRUE; + display_attr = TRUE; + display_free_sections = TRUE; + display_summary = TRUE; + + display_file_metadata = TRUE; + display_group_metadata = TRUE; + display_dset_metadata = TRUE; + } + + if (display_file) + print_file_info(iter); + if (display_file_metadata) + print_file_metadata(iter); + + if (display_group) + print_group_info(iter); + if (!display_all && display_group_metadata) + print_group_metadata(iter); + + if (display_dset) + print_dataset_info(iter); + if (display_dset_dtype_meta) + print_dset_dtype_meta(iter); + if (!display_all && display_dset_metadata) + print_dset_metadata(iter); + + if (display_attr) + print_attr_info(iter); + if (display_free_sections) + print_freespace_info(iter); + if (display_summary) + print_storage_summary(iter); +} /* print_file_statistics() */ + +/*------------------------------------------------------------------------- + * Function: print_object_statistics + * + * Purpose: Prints object statistics + * + * Return: Success: 0 + * + * Failure: Never fails + * + * Programmer: Elena Pourmal + * Thursday, August 17, 2006 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static void +print_object_statistics(const char *name) +{ + HDprintf("Object name %s\n", name); +} /* print_object_statistics() */ + +/*------------------------------------------------------------------------- + * Function: print_statistics + * + * Purpose: Prints statistics + * + * Return: Success: 0 + * + * Failure: Never fails + * + * Programmer: Elena Pourmal + * Thursday, August 17, 2006 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static void +print_statistics(const char *name, const iter_t *iter) +{ + if (display_object) + print_object_statistics(name); + else + print_file_statistics(iter); +} /* print_statistics() */ + +/*------------------------------------------------------------------------- + * Function: main + * + * Modifications: + * 2/2010; Vailin Choi + * Get the size of user block + * + *------------------------------------------------------------------------- + */ +int +main(int argc, char *argv[]) +{ + iter_t iter; + const char *fname = NULL; + hid_t fid = H5I_INVALID_HID; + struct handler_t *hand = NULL; + hid_t fapl_id = H5P_DEFAULT; + + h5tools_setprogname(PROGRAMNAME); + h5tools_setstatus(EXIT_SUCCESS); + + /* Initialize h5tools lib */ + h5tools_init(); + + HDmemset(&iter, 0, sizeof(iter)); + + if (parse_command_line(argc, (const char *const *)argv, &hand) < 0) + goto done; + + /* enable error reporting if command line option */ + h5tools_error_report(); + + if (drivername) { + h5tools_vfd_info_t vfd_info; + + vfd_info.type = VFD_BY_NAME; + vfd_info.info = NULL; + vfd_info.u.name = drivername; + +#ifdef H5_HAVE_ROS3_VFD + if (!HDstrcmp(drivername, drivernames[ROS3_VFD_IDX])) + vfd_info.info = &ros3_fa; +#endif +#ifdef H5_HAVE_LIBHDFS + if (!HDstrcmp(drivername, drivernames[HDFS_VFD_IDX])) + vfd_info.info = &hdfs_fa; +#endif + + if ((fapl_id = h5tools_get_fapl(H5P_DEFAULT, NULL, &vfd_info)) < 0) { + error_msg("Unable to create FAPL for file access\n"); + goto done; + } + } + + fname = argv[H5_optind]; + + /* Check for filename given */ + if (fname) { + hid_t fcpl; + H5F_info2_t finfo; + + HDprintf("Filename: %s\n", fname); + + fid = h5tools_fopen(fname, H5F_ACC_RDONLY, fapl_id, (fapl_id != H5P_DEFAULT), NULL, 0); + + if (fid < 0) { + error_msg("unable to open file \"%s\"\n", fname); + h5tools_setstatus(EXIT_FAILURE); + goto done; + } /* end if */ + + /* Initialize iter structure */ + iter.fid = fid; + + if (H5Fget_filesize(fid, &iter.filesize) < 0) + warn_msg("Unable to retrieve file size\n"); + HDassert(iter.filesize != 0); + + /* Get storage info for file-level structures */ + if (H5Fget_info2(fid, &finfo) < 0) + warn_msg("Unable to retrieve file info\n"); + else { + iter.super_size = finfo.super.super_size; + iter.super_ext_size = finfo.super.super_ext_size; + iter.SM_hdr_storage_size = finfo.sohm.hdr_size; + iter.SM_index_storage_size = finfo.sohm.msgs_info.index_size; + iter.SM_heap_storage_size = finfo.sohm.msgs_info.heap_size; + iter.free_space = finfo.free.tot_space; + iter.free_hdr = finfo.free.meta_size; + } /* end else */ + + iter.num_small_groups = (unsigned long *)HDcalloc((size_t)sgroups_threshold, sizeof(unsigned long)); + iter.num_small_attrs = + (unsigned long *)HDcalloc((size_t)(sattrs_threshold + 1), sizeof(unsigned long)); + iter.small_dset_dims = (unsigned long *)HDcalloc((size_t)sdsets_threshold, sizeof(unsigned long)); + + if (iter.num_small_groups == NULL || iter.num_small_attrs == NULL || iter.small_dset_dims == NULL) { + error_msg("Unable to allocate memory for tracking small groups/datasets/attributes\n"); + h5tools_setstatus(EXIT_FAILURE); + goto done; + } + + if ((fcpl = H5Fget_create_plist(fid)) < 0) + warn_msg("Unable to retrieve file creation property\n"); + + if (H5Pget_userblock(fcpl, &iter.ublk_size) < 0) + warn_msg("Unable to retrieve userblock size\n"); + + if (H5Pget_file_space_strategy(fcpl, &iter.fs_strategy, &iter.fs_persist, &iter.fs_threshold) < 0) + warn_msg("Unable to retrieve file space information\n"); + HDassert(iter.fs_strategy >= 0 && iter.fs_strategy < H5F_FSPACE_STRATEGY_NTYPES); + + if (H5Pget_file_space_page_size(fcpl, &iter.fsp_size) < 0) + warn_msg("Unable to retrieve file space page size\n"); + + /* get information for free-space sections */ + if (freespace_stats(fid, &iter) < 0) + warn_msg("Unable to retrieve freespace info\n"); + + /* Walk the objects or all file */ + if (display_object) { + unsigned u; + + for (u = 0; u < hand->obj_count; u++) { + if (h5trav_visit(fid, hand->obj[u], TRUE, TRUE, obj_stats, lnk_stats, &iter, H5O_INFO_ALL) < + 0) { + error_msg("unable to traverse object \"%s\"\n", hand->obj[u]); + h5tools_setstatus(EXIT_FAILURE); + } + else + print_statistics(hand->obj[u], &iter); + } /* end for */ + } /* end if */ + else { + if (h5trav_visit(fid, "/", TRUE, TRUE, obj_stats, lnk_stats, &iter, H5O_INFO_ALL) < 0) { + error_msg("unable to traverse objects/links in file \"%s\"\n", fname); + h5tools_setstatus(EXIT_FAILURE); + } + else + print_statistics("/", &iter); + } /* end else */ + } /* end if */ + +done: + hand_free(hand); + + /* Free iter structure */ + iter_free(&iter); + + if (fapl_id != H5P_DEFAULT) { + if (H5Pclose(fapl_id) < 0) { + error_msg("unable to close fapl entry\n"); + h5tools_setstatus(EXIT_FAILURE); + } + } + + if (fid >= 0 && H5Fclose(fid) < 0) { + error_msg("unable to close file \"%s\"\n", fname); + h5tools_setstatus(EXIT_FAILURE); + } /* end if */ + + leave(h5tools_getstatus()); +} /* end main() */ |
