From 054ca47350a07361e61d668453f62a0c657da70d Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Fri, 6 May 2011 17:02:24 -0500 Subject: [svn-r20767] Purpose: HDFFV-5928 - GMQS: h5diff problem and improvement on comparsing the same objects Description: Improved performance by eliminating duplicated action for getting object information in half from the previous fixe when comparing group vs group. This is addition to the previous commit r20676. Tested: jam (linux32-LE), koala (linux64-LE), heiwa (linuxppc64-BE), tejeda (mac32-LE), linew (solaris-BE), cmake --- release_docs/RELEASE.txt | 14 ++++-- tools/h5diff/ph5diff_main.c | 4 +- tools/lib/h5diff.c | 114 +++++++++++++++++++++++++++++--------------- tools/lib/h5diff.h | 12 ++++- tools/lib/h5trav.c | 12 +++++ tools/lib/h5trav.h | 3 ++ tools/lib/ph5diff.h | 4 +- 7 files changed, 116 insertions(+), 47 deletions(-) diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index d14463b..fdfe110 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -525,13 +525,19 @@ Bug Fixes since HDF5-1.8.0 release Tools ----- + - Fixed h5diff to compare file itself correctly. Previously h5diff + reported either different or not compatible in certain cases even + comparing file itself. This fix also improve performance when + comparing same target objects through verifying the obj&file + addresses before comparing the details in the objects (ex: datasets + or attributes) Bug #HDFFV-5928 (XCAO & JKM 05/06/2011) - Updated h5dump test case script to prevent entire test failure upon source directory is read-only. Bug# HDFFV-4342 (JKM 2011/4/12) - Fixed h5dump displaying incorrect values for H5T_STD_I8BE type data in - attribute on Big-Endian machine. H5T_STD_I8BE is unsigned 8bit type, - so h5dump is supposed to display -2 instead of 254. It worked correctly - on Little-Endian system , but not on Big-Endian system. Bug #HDFFV-4358 - (JKM 04/08/2011) + attribute on Big-Endian machine. H5T_STD_I8BE is unsigned 8bit type, + so h5dump is supposed to display -2 instead of 254. It worked + correctly on Little-Endian system , but not on Big-Endian system. + Bug #HDFFV-4358 (JKM 04/08/2011) - Updated to unify option name to '--enable-error-stack' for printing HDF5 error stack messages for HDF5 tools. h5ls and h5dump for now. For h5ls, this replaces "-e/--errors" option, which is deprecated. diff --git a/tools/h5diff/ph5diff_main.c b/tools/h5diff/ph5diff_main.c index bac1086..871b93b 100644 --- a/tools/h5diff/ph5diff_main.c +++ b/tools/h5diff/ph5diff_main.c @@ -136,7 +136,7 @@ int main(int argc, const char *argv[]) static void ph5diff_worker(int nID) { - struct diff_args args; + struct diff_mpi_args args; hid_t file1_id, file2_id; char filenames[2][MAX_FILENAME]; char out_data[PRINT_DATA_MAX_SIZE] = {0}; @@ -177,7 +177,7 @@ ph5diff_worker(int nID) /*Recv parameters for diff from manager task */ MPI_Recv(&args, sizeof(args), MPI_BYTE, 0, MPI_TAG_ARGS, MPI_COMM_WORLD, &Status); /*Do the diff */ - diffs.nfound = diff(file1_id, args.name1, file2_id, args.name2, &(args.options), args.type); + diffs.nfound = diff(file1_id, args.name1, file2_id, args.name2, &(args.options), &(args.argdata)); diffs.not_cmp = args.options.not_cmp; /*If print buffer has something in it, request print token.*/ diff --git a/tools/lib/h5diff.c b/tools/lib/h5diff.c index 4f82906..bbf9b39 100644 --- a/tools/lib/h5diff.c +++ b/tools/lib/h5diff.c @@ -366,6 +366,7 @@ static void build_match_list (const char *objname1, trav_info_t *info1, const ch int path2_offset = 0; int cmp; trav_table_t *table; + size_t idx; /* init */ trav_table_init( &table ); @@ -403,6 +404,14 @@ static void build_match_list (const char *objname1, trav_info_t *info1, const ch infile[0] = 1; infile[1] = 1; trav_table_addflags(infile, path1_lp, info1->paths[curr1].type, table); + /* if the two point to the same target object, + * mark that in table */ + if (info1->paths[curr1].fileno == info2->paths[curr2].fileno && + info1->paths[curr1].objno == info2->paths[curr2].objno ) + { + idx = table->nobjs - 1; + table->objs[idx].is_same_trgobj = 1; + } } curr1++; curr2++; @@ -673,6 +682,8 @@ hsize_t h5diff(const char *fname1, if(!is_valid_options(options)) goto out; + options->cmn_objs = 1; /* eliminate warning */ + /*------------------------------------------------------------------------- * open the files first; if they are not valid, no point in continuing *------------------------------------------------------------------------- @@ -850,7 +861,17 @@ hsize_t h5diff(const char *fname1, HDstrcat(obj2fullname, "/"); } - options->cmn_objs = 1; /* eliminate warning */ + /* + * If verbose options is used, need to traverse thorugh the list of objects + * in the group to print out objects information. + * Use h5tools_is_obj_same() to improve performance by skipping + * comparing details of same objects. + */ + if(!(options->m_verbose || options->m_report)) + { + if (h5tools_is_obj_same(file1_id,obj1fullname,file2_id,obj2fullname)!=0) + goto out; + } /*--------------------------------------------- * check for following symlinks @@ -934,18 +955,6 @@ hsize_t h5diff(const char *fname1, { /* - * Use h5tools_is_obj_same() to improve performance by skipping - * comparing details of same objects. If verbose options is used, - * need to traverse thorugh the list of objects in the group and - * print out object information. Details of same objects will be - * skipped at diff() function. - */ - if(!(options->m_verbose || options->m_report)) { - if (h5tools_is_obj_same(file1_id,obj1fullname,file2_id,obj2fullname)!=0) - goto out; - } - - /* * traverse group1 */ trav_info_init(fname1, file1_id, &info1_grp); @@ -1092,6 +1101,8 @@ hsize_t diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1, char * grp2_path = ""; char * obj1_fullpath = NULL; char * obj2_fullpath = NULL; + h5trav_type_t objtype; + diff_args_t argdata; /* @@ -1153,7 +1164,7 @@ hsize_t diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1, int n; int busyTasks = 0; struct diffs_found nFoundbyWorker; - struct diff_args args; + struct diff_mpi_args args; int havePrintToken = 1; MPI_Status Status; @@ -1165,6 +1176,7 @@ hsize_t diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1, { if( table->objs[i].flags[0] && table->objs[i].flags[1]) { + objtype = table->objs[i].type; /* make full path for obj1 */ obj1_fullpath = (char*)HDcalloc (strlen(grp1_path) + strlen (table->objs[i].name) + 1, sizeof (char)); HDstrcpy(obj1_fullpath, grp1_path); @@ -1175,12 +1187,16 @@ hsize_t diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1, HDstrcpy(obj2_fullpath, grp2_path); HDstrcat(obj2_fullpath, table->objs[i].name); + /* Set argdata to pass other args into diff() */ + argdata.type = objtype; + argdata.is_same_trgobj = table->objs[i].is_same_trgobj; + options->cmn_objs = 1; if(!g_Parallel) { nfound += diff(file1_id, obj1_fullpath, file2_id, obj2_fullpath, - options, table->objs[i].type); + options, &argdata); } /* end if */ #ifdef H5_HAVE_PARALLEL else @@ -1204,10 +1220,12 @@ hsize_t diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1, MPI_Abort(MPI_COMM_WORLD, 0); } /* end if */ + /* set args struct to pass */ HDstrcpy(args.name1, obj1_fullpath); HDstrcpy(args.name2, obj2_fullpath); args.options = *options; - args.type = table->objs[i].type; + args.argdata.type = objtype; + args.argdata.is_same_trgobj = table->objs[i].is_same_trgobj; h5diffdebug2("busyTasks=%d\n", busyTasks); /* if there are any outstanding print requests, let's handle one. */ @@ -1500,6 +1518,7 @@ hsize_t diff_compare(hid_t file1_id, int is_dangle_link2 = 0; const char *obj1name = obj1_name; const char *obj2name = obj2_name; + diff_args_t argdata; /* local variables for diff() */ h5trav_type_t obj1type, obj2type; @@ -1707,9 +1726,13 @@ hsize_t diff_compare(hid_t file1_id, goto out; } + /* Set argdata to pass other args into diff() */ + argdata.type = obj1type; + argdata.is_same_trgobj = 0; + nfound = diff(file1_id, obj1name, file2_id, obj2name, - options, obj1type); + options, &argdata); out: /*------------------------------- @@ -1764,11 +1787,16 @@ out: * * Return: Number of differences found * - * Programmer: Pedro Vicente, pvn@ncsa.uiuc.edu - * Date: May 9, 2003 - * * Programmer: Jonathan Kim * - add following links feature (Feb 11,2010) + * - Change to use diff_args_t to pass the rest of args. + * Passing through it instead of individual args provides smoother + * extensibility through its members along with MPI code update for ph5diff + * as it doesn't require interface change. + * (May 6,2011) + * + * Programmer: Pedro Vicente, pvn@ncsa.uiuc.edu + * Date: May 9, 2003 *------------------------------------------------------------------------- */ @@ -1777,7 +1805,7 @@ hsize_t diff(hid_t file1_id, hid_t file2_id, const char *path2, diff_opt_t * options, - h5trav_type_t type) + diff_args_t *argdata) { hid_t type1_id = (-1); hid_t type2_id = (-1); @@ -1850,24 +1878,24 @@ hsize_t diff(hid_t file1_id, goto out2; /* - * Use h5tools_is_obj_same() to improve performance by skipping - * comparing details of same objects, - * - * check if two paths point to the same object for hard links or - * the option of follow link is used. h5tools_is_obj_same() is not - * needed for other cases. + * If both points to the same target object, skip comparing details inside + * of the objects to improve performance. + * Always check for the hard links, otherwise if follow symlink option is + * specified. + * + * Perform this to match the outputs as bypassing. */ - is_hard_link = (type==H5TRAV_TYPE_DATASET || - type==H5TRAV_TYPE_NAMED_DATATYPE || - type==H5TRAV_TYPE_GROUP); + is_hard_link = (argdata->type == H5TRAV_TYPE_DATASET || + argdata->type == H5TRAV_TYPE_NAMED_DATATYPE || + argdata->type == H5TRAV_TYPE_GROUP); if (options->follow_links || is_hard_link) { - if (h5tools_is_obj_same(file1_id, path1, file2_id, path2)!=0) + if (argdata->is_same_trgobj) { /* print information is only verbose option is used */ if(options->m_verbose || options->m_report) { - switch(type) + switch(argdata->type) { case H5TRAV_TYPE_DATASET: do_print_objname("dataset", path1, path2, options); @@ -1889,7 +1917,7 @@ hsize_t diff(hid_t file1_id, break; default: parallel_print("Comparison not supported: <%s> and <%s> are of type %s\n", - path1, path2, get_type(type) ); + path1, path2, get_type(argdata->type) ); options->not_cmp = 1; break; } /* switch(type)*/ @@ -1898,10 +1926,10 @@ hsize_t diff(hid_t file1_id, } /* if(options->m_verbose || options->m_report) */ goto out2; - } /* h5tools_is_obj_same */ + } } - switch(type) + switch(argdata->type) { /*---------------------------------------------------------------------- * H5TRAV_TYPE_DATASET @@ -2031,10 +2059,15 @@ hsize_t diff(hid_t file1_id, goto out; } + /* Renew type in argdata to pass into diff(). + * For recursive call, argdata.is_same_trgobj is already + * set from initial call, so don't reset here */ + argdata->type = linkinfo1.trg_type; + /* call self to compare target object */ nfound += diff(file1_id, path1, file2_id, path2, - options, linkinfo1.trg_type); + options, argdata); } /* always print the number of differences found in verbose mode */ @@ -2083,9 +2116,14 @@ hsize_t diff(hid_t file1_id, goto out; } + /* Renew type in argdata to pass into diff(). + * For recursive call, argdata.is_same_trgobj is already + * set from initial call, so don't reset here */ + argdata->type = linkinfo1.trg_type; + nfound = diff(file1_id, path1, file2_id, path2, - options, linkinfo1.trg_type); + options, argdata); } } /* end if */ else @@ -2116,7 +2154,7 @@ hsize_t diff(hid_t file1_id, default: if(options->m_verbose) parallel_print("Comparison not supported: <%s> and <%s> are of type %s\n", - path1, path2, get_type(type) ); + path1, path2, get_type(argdata->type) ); options->not_cmp = 1; break; } diff --git a/tools/lib/h5diff.h b/tools/lib/h5diff.h index 19d5ed5..66c5188 100644 --- a/tools/lib/h5diff.h +++ b/tools/lib/h5diff.h @@ -22,6 +22,16 @@ #define MAX_FILENAME 1024 /*------------------------------------------------------------------------- + * This is used to pass multiple args into diff(). + * Passing this instead of several each arg provides smoother extensibility + * through its members along with MPI code for ph5diff + * as it doesn't require interface change. + *------------------------------------------------------------------------*/ +typedef struct { + h5trav_type_t type; + hbool_t is_same_trgobj; +} diff_args_t; +/*------------------------------------------------------------------------- * command line options *------------------------------------------------------------------------- */ @@ -106,7 +116,7 @@ hsize_t diff( hid_t file1_id, hid_t file2_id, const char *path2, diff_opt_t *options, - h5trav_type_t type ); + diff_args_t *argdata); hsize_t diff_compare( hid_t file1_id, const char *file1_name, diff --git a/tools/lib/h5trav.c b/tools/lib/h5trav.c index 5195261..c3cfddf 100644 --- a/tools/lib/h5trav.c +++ b/tools/lib/h5trav.c @@ -307,6 +307,8 @@ trav_info_add(trav_info_t *info, const char *path, h5trav_type_t obj_type) idx = info->nused++; info->paths[idx].path = HDstrdup(path); info->paths[idx].type = obj_type; + info->paths[idx].fileno = 0; + info->paths[idx].objno = HADDR_UNDEF; } /* end trav_info_add() */ @@ -327,10 +329,18 @@ int trav_info_visit_obj(const char *path, const H5O_info_t *oinfo, const char UNUSED *already_visited, void *udata) { + size_t idx; + trav_info_t *info_p; /* Add the object to the 'info' struct */ /* (object types map directly to "traversal" types) */ trav_info_add((trav_info_t *)udata, path, (h5trav_type_t)oinfo->type); + /* set object addr and fileno. These are for checking same object */ + info_p = (trav_info_t *) udata; + idx = info_p->nused - 1; + info_p->paths[idx].objno = oinfo->addr; + info_p->paths[idx].fileno = oinfo->fileno; + return(0); } /* end trav_info_visit_obj() */ @@ -656,6 +666,7 @@ trav_table_add(trav_table_t *table, new = table->nobjs++; table->objs[new].objno = oinfo ? oinfo->addr : HADDR_UNDEF; table->objs[new].flags[0] = table->objs[new].flags[1] = 0; + table->objs[new].is_same_trgobj = 0; table->objs[new].name = (char *)HDstrdup(path); table->objs[new].type = oinfo ? (h5trav_type_t)oinfo->type : H5TRAV_TYPE_LINK; table->objs[new].nlinks = 0; @@ -739,6 +750,7 @@ void trav_table_addflags(unsigned *flags, table->objs[new].objno = 0; table->objs[new].flags[0] = flags[0]; table->objs[new].flags[1] = flags[1]; + table->objs[new].is_same_trgobj = 0; table->objs[new].name = (char *)HDstrdup(name); table->objs[new].type = type; table->objs[new].nlinks = 0; diff --git a/tools/lib/h5trav.h b/tools/lib/h5trav.h index 8eb93fa..da8dc69 100644 --- a/tools/lib/h5trav.h +++ b/tools/lib/h5trav.h @@ -65,6 +65,8 @@ typedef struct symlink_trav_t { typedef struct trav_path_t { char *path; h5trav_type_t type; + haddr_t objno; /* object address */ + unsigned long fileno; /* File number that object is located in */ } trav_path_t; typedef struct trav_info_t { @@ -95,6 +97,7 @@ typedef struct trav_link_t { typedef struct trav_obj_t { haddr_t objno; /* object address */ unsigned flags[2]; /* h5diff.object is present or not in both files*/ + hbool_t is_same_trgobj; /* same target object? no need to compare */ char *name; /* name */ h5trav_type_t type; /* type of object */ trav_link_t *links; /* array of possible link names */ diff --git a/tools/lib/ph5diff.h b/tools/lib/ph5diff.h index eb15f12..9476a71 100644 --- a/tools/lib/ph5diff.h +++ b/tools/lib/ph5diff.h @@ -36,12 +36,12 @@ H5TOOLS_DLLVAR char outBuff[]; H5TOOLS_DLLVAR int outBuffOffset; H5TOOLS_DLLVAR FILE * overflow_file; -struct diff_args +struct diff_mpi_args { char name1[256]; char name2[256]; - h5trav_type_t type; diff_opt_t options; + diff_args_t argdata; /* rest args */ }; struct diffs_found -- cgit v0.12