From a396f8dd7f882c1eddab91e1b7fa3c02bec5e06e Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 10 May 2011 18:05:28 -0500 Subject: [svn-r20794] Purpose: HDFFV-5928 - GMQS: h5diff problem and improvement on comparsing the same objects Description: Merged from HDF5 trunk r20767. Improved performance by eliminating duplicated action for getting object information in half from the previous fixe when comparing group vs group. This is addition to the previous commit r20706. Tested: jam (linux32-LE), koala (linux64-LE), heiwa (linuxppc64-BE), tejeda (mac32-LE) --- release_docs/RELEASE.txt | 7 ++- tools/h5diff/ph5diff_main.c | 4 +- tools/lib/h5diff.c | 114 +++++++++++++++++++++++++++++--------------- tools/lib/h5diff.h | 12 ++++- tools/lib/h5trav.c | 12 +++++ tools/lib/h5trav.h | 3 ++ tools/lib/ph5diff.h | 4 +- 7 files changed, 112 insertions(+), 44 deletions(-) diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index b6f6d86..be55c5f 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -105,7 +105,12 @@ Bug Fixes since HDF5-1.8.7 Tools ----- - - None + - Fixed h5diff to compare file itself correctly. Previously h5diff + reported either different or not compatible in certain cases even + comparing file itself. This fix also improve performance when + comparing same target objects through verifying the obj&file + addresses before comparing the details in the objects (ex: datasets + or attributes) Bug #HDFFV-5928 (XCAO & JKM 05/06/2011) F90 API ------ diff --git a/tools/h5diff/ph5diff_main.c b/tools/h5diff/ph5diff_main.c index bac1086..871b93b 100644 --- a/tools/h5diff/ph5diff_main.c +++ b/tools/h5diff/ph5diff_main.c @@ -136,7 +136,7 @@ int main(int argc, const char *argv[]) static void ph5diff_worker(int nID) { - struct diff_args args; + struct diff_mpi_args args; hid_t file1_id, file2_id; char filenames[2][MAX_FILENAME]; char out_data[PRINT_DATA_MAX_SIZE] = {0}; @@ -177,7 +177,7 @@ ph5diff_worker(int nID) /*Recv parameters for diff from manager task */ MPI_Recv(&args, sizeof(args), MPI_BYTE, 0, MPI_TAG_ARGS, MPI_COMM_WORLD, &Status); /*Do the diff */ - diffs.nfound = diff(file1_id, args.name1, file2_id, args.name2, &(args.options), args.type); + diffs.nfound = diff(file1_id, args.name1, file2_id, args.name2, &(args.options), &(args.argdata)); diffs.not_cmp = args.options.not_cmp; /*If print buffer has something in it, request print token.*/ diff --git a/tools/lib/h5diff.c b/tools/lib/h5diff.c index 4f82906..bbf9b39 100644 --- a/tools/lib/h5diff.c +++ b/tools/lib/h5diff.c @@ -366,6 +366,7 @@ static void build_match_list (const char *objname1, trav_info_t *info1, const ch int path2_offset = 0; int cmp; trav_table_t *table; + size_t idx; /* init */ trav_table_init( &table ); @@ -403,6 +404,14 @@ static void build_match_list (const char *objname1, trav_info_t *info1, const ch infile[0] = 1; infile[1] = 1; trav_table_addflags(infile, path1_lp, info1->paths[curr1].type, table); + /* if the two point to the same target object, + * mark that in table */ + if (info1->paths[curr1].fileno == info2->paths[curr2].fileno && + info1->paths[curr1].objno == info2->paths[curr2].objno ) + { + idx = table->nobjs - 1; + table->objs[idx].is_same_trgobj = 1; + } } curr1++; curr2++; @@ -673,6 +682,8 @@ hsize_t h5diff(const char *fname1, if(!is_valid_options(options)) goto out; + options->cmn_objs = 1; /* eliminate warning */ + /*------------------------------------------------------------------------- * open the files first; if they are not valid, no point in continuing *------------------------------------------------------------------------- @@ -850,7 +861,17 @@ hsize_t h5diff(const char *fname1, HDstrcat(obj2fullname, "/"); } - options->cmn_objs = 1; /* eliminate warning */ + /* + * If verbose options is used, need to traverse thorugh the list of objects + * in the group to print out objects information. + * Use h5tools_is_obj_same() to improve performance by skipping + * comparing details of same objects. + */ + if(!(options->m_verbose || options->m_report)) + { + if (h5tools_is_obj_same(file1_id,obj1fullname,file2_id,obj2fullname)!=0) + goto out; + } /*--------------------------------------------- * check for following symlinks @@ -934,18 +955,6 @@ hsize_t h5diff(const char *fname1, { /* - * Use h5tools_is_obj_same() to improve performance by skipping - * comparing details of same objects. If verbose options is used, - * need to traverse thorugh the list of objects in the group and - * print out object information. Details of same objects will be - * skipped at diff() function. - */ - if(!(options->m_verbose || options->m_report)) { - if (h5tools_is_obj_same(file1_id,obj1fullname,file2_id,obj2fullname)!=0) - goto out; - } - - /* * traverse group1 */ trav_info_init(fname1, file1_id, &info1_grp); @@ -1092,6 +1101,8 @@ hsize_t diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1, char * grp2_path = ""; char * obj1_fullpath = NULL; char * obj2_fullpath = NULL; + h5trav_type_t objtype; + diff_args_t argdata; /* @@ -1153,7 +1164,7 @@ hsize_t diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1, int n; int busyTasks = 0; struct diffs_found nFoundbyWorker; - struct diff_args args; + struct diff_mpi_args args; int havePrintToken = 1; MPI_Status Status; @@ -1165,6 +1176,7 @@ hsize_t diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1, { if( table->objs[i].flags[0] && table->objs[i].flags[1]) { + objtype = table->objs[i].type; /* make full path for obj1 */ obj1_fullpath = (char*)HDcalloc (strlen(grp1_path) + strlen (table->objs[i].name) + 1, sizeof (char)); HDstrcpy(obj1_fullpath, grp1_path); @@ -1175,12 +1187,16 @@ hsize_t diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1, HDstrcpy(obj2_fullpath, grp2_path); HDstrcat(obj2_fullpath, table->objs[i].name); + /* Set argdata to pass other args into diff() */ + argdata.type = objtype; + argdata.is_same_trgobj = table->objs[i].is_same_trgobj; + options->cmn_objs = 1; if(!g_Parallel) { nfound += diff(file1_id, obj1_fullpath, file2_id, obj2_fullpath, - options, table->objs[i].type); + options, &argdata); } /* end if */ #ifdef H5_HAVE_PARALLEL else @@ -1204,10 +1220,12 @@ hsize_t diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1, MPI_Abort(MPI_COMM_WORLD, 0); } /* end if */ + /* set args struct to pass */ HDstrcpy(args.name1, obj1_fullpath); HDstrcpy(args.name2, obj2_fullpath); args.options = *options; - args.type = table->objs[i].type; + args.argdata.type = objtype; + args.argdata.is_same_trgobj = table->objs[i].is_same_trgobj; h5diffdebug2("busyTasks=%d\n", busyTasks); /* if there are any outstanding print requests, let's handle one. */ @@ -1500,6 +1518,7 @@ hsize_t diff_compare(hid_t file1_id, int is_dangle_link2 = 0; const char *obj1name = obj1_name; const char *obj2name = obj2_name; + diff_args_t argdata; /* local variables for diff() */ h5trav_type_t obj1type, obj2type; @@ -1707,9 +1726,13 @@ hsize_t diff_compare(hid_t file1_id, goto out; } + /* Set argdata to pass other args into diff() */ + argdata.type = obj1type; + argdata.is_same_trgobj = 0; + nfound = diff(file1_id, obj1name, file2_id, obj2name, - options, obj1type); + options, &argdata); out: /*------------------------------- @@ -1764,11 +1787,16 @@ out: * * Return: Number of differences found * - * Programmer: Pedro Vicente, pvn@ncsa.uiuc.edu - * Date: May 9, 2003 - * * Programmer: Jonathan Kim * - add following links feature (Feb 11,2010) + * - Change to use diff_args_t to pass the rest of args. + * Passing through it instead of individual args provides smoother + * extensibility through its members along with MPI code update for ph5diff + * as it doesn't require interface change. + * (May 6,2011) + * + * Programmer: Pedro Vicente, pvn@ncsa.uiuc.edu + * Date: May 9, 2003 *------------------------------------------------------------------------- */ @@ -1777,7 +1805,7 @@ hsize_t diff(hid_t file1_id, hid_t file2_id, const char *path2, diff_opt_t * options, - h5trav_type_t type) + diff_args_t *argdata) { hid_t type1_id = (-1); hid_t type2_id = (-1); @@ -1850,24 +1878,24 @@ hsize_t diff(hid_t file1_id, goto out2; /* - * Use h5tools_is_obj_same() to improve performance by skipping - * comparing details of same objects, - * - * check if two paths point to the same object for hard links or - * the option of follow link is used. h5tools_is_obj_same() is not - * needed for other cases. + * If both points to the same target object, skip comparing details inside + * of the objects to improve performance. + * Always check for the hard links, otherwise if follow symlink option is + * specified. + * + * Perform this to match the outputs as bypassing. */ - is_hard_link = (type==H5TRAV_TYPE_DATASET || - type==H5TRAV_TYPE_NAMED_DATATYPE || - type==H5TRAV_TYPE_GROUP); + is_hard_link = (argdata->type == H5TRAV_TYPE_DATASET || + argdata->type == H5TRAV_TYPE_NAMED_DATATYPE || + argdata->type == H5TRAV_TYPE_GROUP); if (options->follow_links || is_hard_link) { - if (h5tools_is_obj_same(file1_id, path1, file2_id, path2)!=0) + if (argdata->is_same_trgobj) { /* print information is only verbose option is used */ if(options->m_verbose || options->m_report) { - switch(type) + switch(argdata->type) { case H5TRAV_TYPE_DATASET: do_print_objname("dataset", path1, path2, options); @@ -1889,7 +1917,7 @@ hsize_t diff(hid_t file1_id, break; default: parallel_print("Comparison not supported: <%s> and <%s> are of type %s\n", - path1, path2, get_type(type) ); + path1, path2, get_type(argdata->type) ); options->not_cmp = 1; break; } /* switch(type)*/ @@ -1898,10 +1926,10 @@ hsize_t diff(hid_t file1_id, } /* if(options->m_verbose || options->m_report) */ goto out2; - } /* h5tools_is_obj_same */ + } } - switch(type) + switch(argdata->type) { /*---------------------------------------------------------------------- * H5TRAV_TYPE_DATASET @@ -2031,10 +2059,15 @@ hsize_t diff(hid_t file1_id, goto out; } + /* Renew type in argdata to pass into diff(). + * For recursive call, argdata.is_same_trgobj is already + * set from initial call, so don't reset here */ + argdata->type = linkinfo1.trg_type; + /* call self to compare target object */ nfound += diff(file1_id, path1, file2_id, path2, - options, linkinfo1.trg_type); + options, argdata); } /* always print the number of differences found in verbose mode */ @@ -2083,9 +2116,14 @@ hsize_t diff(hid_t file1_id, goto out; } + /* Renew type in argdata to pass into diff(). + * For recursive call, argdata.is_same_trgobj is already + * set from initial call, so don't reset here */ + argdata->type = linkinfo1.trg_type; + nfound = diff(file1_id, path1, file2_id, path2, - options, linkinfo1.trg_type); + options, argdata); } } /* end if */ else @@ -2116,7 +2154,7 @@ hsize_t diff(hid_t file1_id, default: if(options->m_verbose) parallel_print("Comparison not supported: <%s> and <%s> are of type %s\n", - path1, path2, get_type(type) ); + path1, path2, get_type(argdata->type) ); options->not_cmp = 1; break; } diff --git a/tools/lib/h5diff.h b/tools/lib/h5diff.h index 19d5ed5..66c5188 100644 --- a/tools/lib/h5diff.h +++ b/tools/lib/h5diff.h @@ -22,6 +22,16 @@ #define MAX_FILENAME 1024 /*------------------------------------------------------------------------- + * This is used to pass multiple args into diff(). + * Passing this instead of several each arg provides smoother extensibility + * through its members along with MPI code for ph5diff + * as it doesn't require interface change. + *------------------------------------------------------------------------*/ +typedef struct { + h5trav_type_t type; + hbool_t is_same_trgobj; +} diff_args_t; +/*------------------------------------------------------------------------- * command line options *------------------------------------------------------------------------- */ @@ -106,7 +116,7 @@ hsize_t diff( hid_t file1_id, hid_t file2_id, const char *path2, diff_opt_t *options, - h5trav_type_t type ); + diff_args_t *argdata); hsize_t diff_compare( hid_t file1_id, const char *file1_name, diff --git a/tools/lib/h5trav.c b/tools/lib/h5trav.c index 5195261..c3cfddf 100644 --- a/tools/lib/h5trav.c +++ b/tools/lib/h5trav.c @@ -307,6 +307,8 @@ trav_info_add(trav_info_t *info, const char *path, h5trav_type_t obj_type) idx = info->nused++; info->paths[idx].path = HDstrdup(path); info->paths[idx].type = obj_type; + info->paths[idx].fileno = 0; + info->paths[idx].objno = HADDR_UNDEF; } /* end trav_info_add() */ @@ -327,10 +329,18 @@ int trav_info_visit_obj(const char *path, const H5O_info_t *oinfo, const char UNUSED *already_visited, void *udata) { + size_t idx; + trav_info_t *info_p; /* Add the object to the 'info' struct */ /* (object types map directly to "traversal" types) */ trav_info_add((trav_info_t *)udata, path, (h5trav_type_t)oinfo->type); + /* set object addr and fileno. These are for checking same object */ + info_p = (trav_info_t *) udata; + idx = info_p->nused - 1; + info_p->paths[idx].objno = oinfo->addr; + info_p->paths[idx].fileno = oinfo->fileno; + return(0); } /* end trav_info_visit_obj() */ @@ -656,6 +666,7 @@ trav_table_add(trav_table_t *table, new = table->nobjs++; table->objs[new].objno = oinfo ? oinfo->addr : HADDR_UNDEF; table->objs[new].flags[0] = table->objs[new].flags[1] = 0; + table->objs[new].is_same_trgobj = 0; table->objs[new].name = (char *)HDstrdup(path); table->objs[new].type = oinfo ? (h5trav_type_t)oinfo->type : H5TRAV_TYPE_LINK; table->objs[new].nlinks = 0; @@ -739,6 +750,7 @@ void trav_table_addflags(unsigned *flags, table->objs[new].objno = 0; table->objs[new].flags[0] = flags[0]; table->objs[new].flags[1] = flags[1]; + table->objs[new].is_same_trgobj = 0; table->objs[new].name = (char *)HDstrdup(name); table->objs[new].type = type; table->objs[new].nlinks = 0; diff --git a/tools/lib/h5trav.h b/tools/lib/h5trav.h index 8eb93fa..da8dc69 100644 --- a/tools/lib/h5trav.h +++ b/tools/lib/h5trav.h @@ -65,6 +65,8 @@ typedef struct symlink_trav_t { typedef struct trav_path_t { char *path; h5trav_type_t type; + haddr_t objno; /* object address */ + unsigned long fileno; /* File number that object is located in */ } trav_path_t; typedef struct trav_info_t { @@ -95,6 +97,7 @@ typedef struct trav_link_t { typedef struct trav_obj_t { haddr_t objno; /* object address */ unsigned flags[2]; /* h5diff.object is present or not in both files*/ + hbool_t is_same_trgobj; /* same target object? no need to compare */ char *name; /* name */ h5trav_type_t type; /* type of object */ trav_link_t *links; /* array of possible link names */ diff --git a/tools/lib/ph5diff.h b/tools/lib/ph5diff.h index eb15f12..9476a71 100644 --- a/tools/lib/ph5diff.h +++ b/tools/lib/ph5diff.h @@ -36,12 +36,12 @@ H5TOOLS_DLLVAR char outBuff[]; H5TOOLS_DLLVAR int outBuffOffset; H5TOOLS_DLLVAR FILE * overflow_file; -struct diff_args +struct diff_mpi_args { char name1[256]; char name2[256]; - h5trav_type_t type; diff_opt_t options; + diff_args_t argdata; /* rest args */ }; struct diffs_found -- cgit v0.12