diff options
author | Quincey Koziol <koziol@hdfgroup.org> | 2005-09-17 16:14:32 (GMT) |
---|---|---|
committer | Quincey Koziol <koziol@hdfgroup.org> | 2005-09-17 16:14:32 (GMT) |
commit | 9a8933bb694c57a31e685c57e706a4144eb6b5a3 (patch) | |
tree | 9793c61462296632f42998954052e64236aaba2c /tools/misc/h5stat.c | |
parent | 9e8b7f29a08923e6c58692e8a6cabbeff08a1fc1 (diff) | |
download | hdf5-9a8933bb694c57a31e685c57e706a4144eb6b5a3.zip hdf5-9a8933bb694c57a31e685c57e706a4144eb6b5a3.tar.gz hdf5-9a8933bb694c57a31e685c57e706a4144eb6b5a3.tar.bz2 |
[svn-r11425] Purpose:
Add new tool
Description:
Add 'h5stat' tool to repo.
Platforms tested:
FreeBSD 4.11 (sleipnir)
Linux 2.4
Diffstat (limited to 'tools/misc/h5stat.c')
-rw-r--r-- | tools/misc/h5stat.c | 638 |
1 files changed, 638 insertions, 0 deletions
diff --git a/tools/misc/h5stat.c b/tools/misc/h5stat.c new file mode 100644 index 0000000..e626c8e --- /dev/null +++ b/tools/misc/h5stat.c @@ -0,0 +1,638 @@ +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include "hdf5.h" + +#define TRUE 1 +#define FALSE 0 +#define MAX(x,y) ((x) < (y) ? (y) : (x)) + +/* Parameters to control statistics gathered */ +#define SIZE_SMALL_GROUPS 10 +#define SIZE_SMALL_DSETS 10 + +/* Datatype statistics for datasets */ +typedef struct dtype_info_t { + hid_t tid; /* ID of datatype */ + unsigned long count; /* Number of types found */ + unsigned long named; /* Number of types that are named */ +} dtype_info_t; + +typedef struct ohdr_info_t { + unsigned long total_size; /* Total size of object headers */ + unsigned long free_size; /* Total free space in object headers */ +} ohdr_info_t; + +/* Info to pass to the iteration functions */ +typedef struct iter_t { + const char *container; /* Full name of the container object */ + unsigned long curr_depth; /* Current depth of hierarchy */ + + unsigned long uniq_groups; /* Number of unique groups */ + unsigned long uniq_dsets; /* Number of unique datasets */ + unsigned long uniq_types; /* Number of unique named datatypes */ + unsigned long uniq_links; /* Number of unique links */ + unsigned long uniq_others; /* Number of other unique objects */ + + unsigned long max_depth; /* Maximum depth of hierarchy */ + unsigned long max_links; /* Maximum # of links to an object */ + unsigned long max_fanout; /* Maximum fanout from a group */ + unsigned long num_small_groups[SIZE_SMALL_GROUPS]; /* Size of small groups tracked */ + unsigned group_nbins; /* Number of bins for group counts */ + unsigned long *group_bins; /* Pointer to array of bins for group counts */ + ohdr_info_t group_ohdr_info; /* Object header information for groups */ + + unsigned long max_dset_rank; /* Maximum rank of dataset */ + unsigned long dset_rank_count[H5S_MAX_RANK]; /* Number of datasets of each rank */ + unsigned long max_dset_dims; /* Maximum dimension size of dataset */ + unsigned long small_dset_dims[SIZE_SMALL_DSETS]; /* Size of dimensions of small datasets tracked */ + unsigned long dset_layouts[H5D_NLAYOUTS]; /* Type of storage for each dataset */ + unsigned long dset_ntypes; /* Number of diff. dataset datatypes found */ + dtype_info_t *dset_type_info; /* Pointer to dataset datatype information found */ + unsigned dset_dim_nbins; /* Number of bins for dataset dimensions */ + unsigned long *dset_dim_bins; /* Pointer to array of bins for dataset dimensions */ + ohdr_info_t dset_ohdr_info; /* Object header information for datasets */ + unsigned long dset_storage_size; /* Size of raw data for datasets */ +} iter_t; + +/* Table containing object id and object name */ +static struct { + int nalloc; /* number of slots allocated */ + int nobjs; /* number of objects */ + struct { + haddr_t id; /* object number */ + char *name; /* full object name */ + } *obj; +} idtab_g; + + +/*------------------------------------------------------------------------- + * Function: ceil_log10 + * + * Purpose: Compute the ceiling of log_10(x) + * + * Return: >0 on success, 0 on failure + * + * Programmer: Quincey Koziol + * Monday, August 22, 2005 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static unsigned +ceil_log10(unsigned long x) +{ + unsigned long pow = 1; + unsigned ret = 0; + + while(x >= pow) { + pow *= 10; + ret++; + } /* end while */ + + return(ret); +} + + +/*------------------------------------------------------------------------- + * Function: sym_insert + * + * Purpose: Add a symbol to the table. + * + * Return: void + * + * Programmer: Robb Matzke + * Thursday, January 21, 1999 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static void +sym_insert(H5G_stat_t *sb, const char *name) +{ + int n; + + /* Don't add it if the link count is 1 because such an object can only + * have one name. */ + if (sb->u.obj.nlink<2) return; + + /* Extend the table */ + if (idtab_g.nobjs>=idtab_g.nalloc) { + idtab_g.nalloc = MAX(256, 2*idtab_g.nalloc); + idtab_g.obj = realloc(idtab_g.obj, + idtab_g.nalloc*sizeof(idtab_g.obj[0])); + } + + /* Insert the entry */ + n = idtab_g.nobjs++; + idtab_g.obj[n].id = sb->u.obj.objno; + idtab_g.obj[n].name = strdup(name); +} + + +/*------------------------------------------------------------------------- + * Function: sym_lookup + * + * Purpose: Find another name for the specified object. + * + * Return: Success: Ptr to another name. + * + * Failure: NULL + * + * Programmer: Robb Matzke + * Thursday, January 21, 1999 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static char * +sym_lookup(H5G_stat_t *sb) +{ + int n; + + if (sb->u.obj.nlink<2) + return NULL; /*only one name possible*/ + for (n=0; n<idtab_g.nobjs; n++) { + if (idtab_g.obj[n].id==sb->u.obj.objno) + return idtab_g.obj[n].name; + } + return NULL; +} + + +/*------------------------------------------------------------------------- + * Function: fix_name + * + * Purpose: Returns a malloc'd buffer that contains the PATH and BASE + * names separated by a single slash. It also removes duplicate + * and trailing slashes. + * + * Return: Success: Ptr to fixed name from malloc() + * + * Failure: NULL + * + * Programmer: Robb Matzke + * Thursday, January 21, 1999 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static char * +fix_name(const char *path, const char *base) +{ + size_t n = (path ? strlen(path) : 0) + (base ? strlen(base) : 0) + 3; + char *s = malloc(n), prev='\0'; + size_t len = 0; + + if (path) { + /* Path, followed by slash */ + for (/*void*/; *path; path++) + if ('/'!=*path || '/'!=prev) + prev = s[len++] = *path; + if ('/' != prev) + prev = s[len++] = '/'; + } + + if (base) { + /* Base name w/o trailing slashes */ + const char *end = base + strlen(base); + while (end > base && '/' == end[-1]) + --end; + + for (/*void*/; base < end; base++) + if ('/' != *base || '/' != prev) + prev = s[len++] = *base; + } + + s[len] = '\0'; + return s; +} + + +/*------------------------------------------------------------------------- + * Function: walk + * + * Purpose: Gather statistics about the file + * + * Return: Success: 0 + * + * Failure: -1 + * + * Programmer: Quincey Koziol + * Tuesday, August 16, 2005 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static herr_t +walk (hid_t group, const char *name, void *_iter) +{ + char *fullname = NULL; + char *s; + H5G_stat_t sb; + iter_t *iter = (iter_t*)_iter; + herr_t ret; /* Generic return value */ + + /* Get the full object name */ + fullname = fix_name(iter->container, name); +/* +printf("walk: fullname = %s\n", fullname); +*/ + + /* Get object information */ + ret = H5Gget_objinfo(group, name, FALSE, &sb); + assert(ret >= 0); + + /* If the object has already been printed then just show the object ID + * and return. */ + if ((s=sym_lookup(&sb))) { + printf("same as %s", s); + } else { + sym_insert(&sb, fullname); + + /* Gather some statistics about the object */ + if(sb.u.obj.nlink > iter->max_links) + iter->max_links = sb.u.obj.nlink; + + switch(sb.type) { + case H5G_GROUP: + { + hid_t gid; /* Group ID */ + const char *last_container; + hsize_t num_objs; + unsigned bin; /* "bin" the number of objects falls in */ + + /* Gather statistics about this type of object */ + iter->uniq_groups++; + if(iter->curr_depth > iter->max_depth) + iter->max_depth = iter->curr_depth; + + /* Get object header information */ + iter->group_ohdr_info.total_size += sb.u.obj.ohdr.size; + iter->group_ohdr_info.free_size += sb.u.obj.ohdr.free; + + gid = H5Gopen(group, name); + assert(gid > 0); + + H5Gget_num_objs(gid, &num_objs); + if(num_objs < SIZE_SMALL_GROUPS) + (iter->num_small_groups[num_objs])++; + if(num_objs > iter->max_fanout) + iter->max_fanout = num_objs; + + /* Add group count to proper bin */ + bin = ceil_log10((unsigned long)num_objs); + if((bin + 1) > iter->group_nbins) { + /* Allocate more storage for info about dataset's datatype */ + iter->group_bins = realloc(iter->group_bins, (bin + 1) * sizeof(unsigned long)); + assert(iter->group_bins); + + /* Initialize counts for intermediate bins */ + while(iter->group_nbins < bin) + iter->group_bins[iter->group_nbins++] = 0; + iter->group_nbins++; + + /* Initialize count for new bin */ + iter->group_bins[bin] = 1; + } /* end if */ + else { + (iter->group_bins[bin])++; + } /* end else */ + + ret = H5Gclose(gid); + assert(ret >= 0); + + last_container = iter->container; + iter->container = fullname; + iter->curr_depth++; + + H5Giterate(group, name, NULL, walk, iter); + + iter->container = last_container; + iter->curr_depth--; + } /* end case */ + break; + + case H5G_DATASET: + { + hid_t did; /* Dataset ID */ + hid_t sid; /* Dataspace ID */ + hid_t tid; /* Datatype ID */ + hid_t dcpl; /* Dataset creation property list ID */ + hsize_t dims[H5S_MAX_RANK]; /* Dimensions of dataset */ + H5D_layout_t lout; /* Layout of dataset */ + unsigned type_found; /* Whether the dataset's datatype was already found */ + int ndims; /* Number of dimensions of dataset */ + hsize_t storage; /* Size of dataset storage */ + unsigned u; /* Local index variable */ + + /* Gather statistics about this type of object */ + iter->uniq_dsets++; + + /* Get object header information */ + iter->dset_ohdr_info.total_size += sb.u.obj.ohdr.size; + iter->dset_ohdr_info.free_size += sb.u.obj.ohdr.free; + + did = H5Dopen(group, name); + assert(did > 0); + + /* Get storage info */ + storage = H5Dget_storage_size(did); + iter->dset_storage_size += storage; + + /* Gather dataspace statistics */ + sid = H5Dget_space(did); + assert(sid > 0); + + ndims = H5Sget_simple_extent_dims(sid, dims, NULL); + assert(ndims >= 0); + + /* Check for larger rank of dataset */ + if((unsigned)ndims > iter->max_dset_rank) + iter->max_dset_rank = ndims; + + /* Track the number of datasets with each rank */ + (iter->dset_rank_count[ndims])++; + + /* Only gather dim size statistics on 1-D datasets */ + if(ndims == 1) { + unsigned bin; /* "bin" the number of objects falls in */ + + if(dims[0] > iter->max_dset_dims) + iter->max_dset_dims = dims[0]; + if(dims[0] < SIZE_SMALL_DSETS) + (iter->small_dset_dims[dims[0]])++; + + /* Add group count to proper bin */ + bin = ceil_log10((unsigned long)dims[0]); + if((bin + 1) > iter->dset_dim_nbins) { + /* Allocate more storage for info about dataset's datatype */ + iter->dset_dim_bins = realloc(iter->dset_dim_bins, (bin + 1) * sizeof(unsigned long)); + assert(iter->dset_dim_bins); + + /* Initialize counts for intermediate bins */ + while(iter->dset_dim_nbins < bin) + iter->dset_dim_bins[iter->dset_dim_nbins++] = 0; + iter->dset_dim_nbins++; + + /* Initialize count for this bin */ + iter->dset_dim_bins[bin] = 1; + } /* end if */ + else { + (iter->dset_dim_bins[bin])++; + } /* end else */ + } /* end if */ + + ret = H5Sclose(sid); + assert(ret >= 0); + + /* Gather datatype statistics */ + tid = H5Dget_type(did); + assert(tid > 0); + + type_found = FALSE; + for(u = 0; u < iter->dset_ntypes; u++) + if(H5Tequal(iter->dset_type_info[u].tid, tid) > 0) { + type_found = TRUE; + break; + } /* end for */ + if(type_found) { + (iter->dset_type_info[u].count)++; + } /* end if */ + else { + unsigned curr_ntype = iter->dset_ntypes; + + /* Increment # of datatypes seen for datasets */ + iter->dset_ntypes++; + + /* Allocate more storage for info about dataset's datatype */ + iter->dset_type_info = realloc(iter->dset_type_info, iter->dset_ntypes * sizeof(dtype_info_t)); + assert(iter->dset_type_info); + + /* Initialize information about datatype */ + iter->dset_type_info[curr_ntype].tid = H5Tcopy(tid); + assert(iter->dset_type_info[curr_ntype].tid > 0); + iter->dset_type_info[curr_ntype].count = 1; + iter->dset_type_info[curr_ntype].named = 0; + + /* Set index for later */ + u = curr_ntype; + } /* end else */ + + /* Check if the datatype is a named datatype */ + if(H5Tcommitted(tid) > 0) + (iter->dset_type_info[u].named)++; + + ret = H5Tclose(tid); + assert(ret >= 0); + + /* Gather layout statistics */ + dcpl = H5Dget_create_plist(did); + assert(dcpl > 0); + + lout = H5Pget_layout(dcpl); + assert(lout > 0); + + /* Track the layout type for dataset */ + (iter->dset_layouts[lout])++; + + ret = H5Pclose(dcpl); + assert(ret >= 0); + + ret = H5Dclose(did); + assert(ret >= 0); + } /* end case */ + break; + + case H5G_TYPE: + /* Gather statistics about this type of object */ + iter->uniq_types++; + break; + + case H5G_LINK: + /* Gather statistics about this type of object */ + iter->uniq_links++; + break; + + default: + /* Gather statistics about this type of object */ + iter->uniq_others++; + break; + } /* end switch */ + } + + if (fullname) + free(fullname); + + return 0; +} + +static void +usage(void) +{ + printf("usage: h5stat <filename>\n"); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + iter_t iter; + hid_t fid; + unsigned long power; /* Temporary "power" for bins */ + size_t dtype_size; /* Size of encoded datatype */ + unsigned long total; /* Total count for various statistics */ + unsigned u; /* Local index variable */ + + if(argc != 2) + usage(); + + printf("Filename: %s\n", argv[1]); + + if((fid = H5Fopen(argv[1], H5F_ACC_RDONLY, H5P_DEFAULT)) > 0) { + iter.container = "/"; + iter.uniq_groups = 0; + iter.uniq_dsets = 0; + iter.uniq_types = 0; + iter.uniq_links = 0; + iter.uniq_others = 0; + iter.curr_depth = 0; + iter.max_depth = 0; + iter.max_links = 0; + iter.max_fanout = 0; + for(u = 0; u < SIZE_SMALL_GROUPS; u++) + iter.num_small_groups[u] = 0; + iter.group_nbins = 0; + iter.group_bins = NULL; + iter.group_ohdr_info.total_size = 0; + iter.group_ohdr_info.free_size = 0; + iter.max_dset_rank = 0; + for(u = 0; u < H5S_MAX_RANK; u++) + iter.dset_rank_count[u] = 0; + iter.max_dset_dims = 0; + for(u = 0; u < SIZE_SMALL_DSETS; u++) + iter.small_dset_dims[u] = 0; + for(u = 0; u < H5D_NLAYOUTS; u++) + iter.dset_layouts[u] = 0; + iter.dset_ntypes = 0; + iter.dset_type_info = NULL; + iter.dset_dim_nbins = 0; + iter.dset_dim_bins = NULL; + iter.dset_ohdr_info.total_size = 0; + iter.dset_ohdr_info.free_size = 0; + iter.dset_storage_size = 0; + walk(fid, "/", &iter); + H5Fclose(fid); + } /* end if */ + + /* Print information about the file */ + printf("File Hierarchy:\n"); + printf("\t# of unique groups: %lu\n", iter.uniq_groups); + printf("\t# of unique datasets: %lu\n", iter.uniq_dsets); + printf("\t# of unique named dataypes: %lu\n", iter.uniq_types); + printf("\t# of unique links: %lu\n", iter.uniq_links); + printf("\t# of unique other: %lu\n", iter.uniq_others); + printf("\tMax. # of links to object: %lu\n", iter.max_links); + printf("\tMax. depth of hierarchy: %lu\n", iter.max_depth); + printf("\tMax. # of objects in group: %lu\n", iter.max_fanout); + + printf("Object header size: (total/unused)\n"); + printf("\tGroups: %lu/%lu\n", iter.group_ohdr_info.total_size,iter.group_ohdr_info.free_size); + printf("\tDatasets: %lu/%lu\n", iter.dset_ohdr_info.total_size,iter.dset_ohdr_info.free_size); + + printf("Small groups:\n"); + total = 0; + for(u = 0; u < SIZE_SMALL_GROUPS; u++) { + if(iter.num_small_groups[u] > 0) { + printf("\t# of groups of size %u: %lu\n", u, iter.num_small_groups[u]); + total += iter.num_small_groups[u]; + } /* end if */ + } /* end for */ + printf("\tTotal # of small groups: %lu\n", total); + + printf("Group bins:\n"); + total = 0; + if(iter.group_bins[0] > 0) { + printf("\t# of groups of size 0: %lu\n", iter.group_bins[0]); + total = iter.group_bins[0]; + } /* end if */ + power = 1; + for(u = 1; u < iter.group_nbins; u++) { + if(iter.group_bins[u] > 0) { + printf("\t# of groups of size %lu - %lu: %lu\n", power, (power * 10) - 1, iter.group_bins[u]); + total += iter.group_bins[u]; + } /* end if */ + power *= 10; + } /* end for */ + printf("\tTotal # of groups: %lu\n", total); + + + if(iter.uniq_dsets > 0) { + printf("Dataset dimension info:\n"); + printf("\tMax. rank of datasets: %lu\n", iter.max_dset_rank); + printf("\tDataset ranks:\n"); + for(u = 0; u < H5S_MAX_RANK; u++) + if(iter.dset_rank_count[u] > 0) + printf("\t\t# of dataset with rank %u: %lu\n", u, iter.dset_rank_count[u]); + + printf("1-D Dataset info:\n"); + printf("\tMax. dimension size of 1-D datasets: %lu\n", iter.max_dset_dims); + printf("\tSmall 1-D datasets:\n"); + total = 0; + for(u = 0; u < SIZE_SMALL_DSETS; u++) { + if(iter.small_dset_dims[u] > 0) { + printf("\t\t# of dataset dimensions of size %u: %lu\n", u, iter.small_dset_dims[u]); + total += iter.small_dset_dims[u]; + } /* end if */ + } /* end for */ + printf("\t\tTotal small datasets: %lu\n", total); + + /* Protect against no datasets in file */ + if(iter.dset_dim_nbins > 0) { + printf("\t1-D Dataset dimension bins:\n"); + total = 0; + if(iter.dset_dim_bins[0] > 0) { + printf("\t\t# of datasets of size 0: %lu\n", iter.dset_dim_bins[0]); + total = iter.dset_dim_bins[0]; + } /* end if */ + power = 1; + for(u = 1; u < iter.dset_dim_nbins; u++) { + if(iter.dset_dim_bins[u] > 0) { + printf("\t\t# of datasets of size %lu - %lu: %lu\n", power, (power * 10) - 1, iter.dset_dim_bins[u]); + total += iter.dset_dim_bins[u]; + } /* end if */ + power *= 10; + } /* end for */ + printf("\t\tTotal # of datasets: %lu\n", total); + } /* end if */ + + printf("Dataset storage info:\n"); + printf("\tTotal raw data size: %lu\n", iter.dset_storage_size); + + printf("Dataset layout info:\n"); + for(u = 0; u < H5D_NLAYOUTS; u++) + printf("\tDataset layout counts[%s]: %lu\n", (u == 0 ? "COMPACT" : + (u == 1 ? "CONTIG" : "CHUNKED")), iter.dset_layouts[u]); + + printf("Dataset datatype info:\n"); + printf("\t# of unique datatypes used by datasets: %lu\n", iter.dset_ntypes); + total = 0; + for(u = 0; u < iter.dset_ntypes; u++) { + H5Tencode(iter.dset_type_info[u].tid, NULL, &dtype_size); + printf("\tDataset datatype #%u:\n", u); + printf("\t\tCount (total/named) = (%lu/%lu)\n", iter.dset_type_info[u].count, iter.dset_type_info[u].named); + printf("\t\tSize (desc./elmt) = (%lu/%lu)\n", (unsigned long)dtype_size, (unsigned long)H5Tget_size(iter.dset_type_info[u].tid)); + H5Tclose(iter.dset_type_info[u].tid); + total += iter.dset_type_info[u].count; + } /* end for */ + printf("\tTotal dataset datatype count: %lu\n", total); + } /* end if */ + + return (0); +} + |