summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJonathan Kim <jkm@hdfgroup.org>2011-05-06 22:02:24 (GMT)
committerJonathan Kim <jkm@hdfgroup.org>2011-05-06 22:02:24 (GMT)
commit054ca47350a07361e61d668453f62a0c657da70d (patch)
tree38273e03cf23af80e6be9c4d0a95e65d7e326a9b
parent03cc051dde9c5673afe847a59dda84fc6dc6d0dc (diff)
downloadhdf5-054ca47350a07361e61d668453f62a0c657da70d.zip
hdf5-054ca47350a07361e61d668453f62a0c657da70d.tar.gz
hdf5-054ca47350a07361e61d668453f62a0c657da70d.tar.bz2
[svn-r20767] Purpose:
HDFFV-5928 - GMQS: h5diff problem and improvement on comparsing the same objects Description: Improved performance by eliminating duplicated action for getting object information in half from the previous fixe when comparing group vs group. This is addition to the previous commit r20676. Tested: jam (linux32-LE), koala (linux64-LE), heiwa (linuxppc64-BE), tejeda (mac32-LE), linew (solaris-BE), cmake
-rw-r--r--release_docs/RELEASE.txt14
-rw-r--r--tools/h5diff/ph5diff_main.c4
-rw-r--r--tools/lib/h5diff.c114
-rw-r--r--tools/lib/h5diff.h12
-rw-r--r--tools/lib/h5trav.c12
-rw-r--r--tools/lib/h5trav.h3
-rw-r--r--tools/lib/ph5diff.h4
7 files changed, 116 insertions, 47 deletions
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt
index d14463b..fdfe110 100644
--- a/release_docs/RELEASE.txt
+++ b/release_docs/RELEASE.txt
@@ -525,13 +525,19 @@ Bug Fixes since HDF5-1.8.0 release
Tools
-----
+ - Fixed h5diff to compare file itself correctly. Previously h5diff
+ reported either different or not compatible in certain cases even
+ comparing file itself. This fix also improve performance when
+ comparing same target objects through verifying the obj&file
+ addresses before comparing the details in the objects (ex: datasets
+ or attributes) Bug #HDFFV-5928 (XCAO & JKM 05/06/2011)
- Updated h5dump test case script to prevent entire test failure upon
source directory is read-only. Bug# HDFFV-4342 (JKM 2011/4/12)
- Fixed h5dump displaying incorrect values for H5T_STD_I8BE type data in
- attribute on Big-Endian machine. H5T_STD_I8BE is unsigned 8bit type,
- so h5dump is supposed to display -2 instead of 254. It worked correctly
- on Little-Endian system , but not on Big-Endian system. Bug #HDFFV-4358
- (JKM 04/08/2011)
+ attribute on Big-Endian machine. H5T_STD_I8BE is unsigned 8bit type,
+ so h5dump is supposed to display -2 instead of 254. It worked
+ correctly on Little-Endian system , but not on Big-Endian system.
+ Bug #HDFFV-4358 (JKM 04/08/2011)
- Updated to unify option name to '--enable-error-stack' for printing
HDF5 error stack messages for HDF5 tools. h5ls and h5dump for now.
For h5ls, this replaces "-e/--errors" option, which is deprecated.
diff --git a/tools/h5diff/ph5diff_main.c b/tools/h5diff/ph5diff_main.c
index bac1086..871b93b 100644
--- a/tools/h5diff/ph5diff_main.c
+++ b/tools/h5diff/ph5diff_main.c
@@ -136,7 +136,7 @@ int main(int argc, const char *argv[])
static void
ph5diff_worker(int nID)
{
- struct diff_args args;
+ struct diff_mpi_args args;
hid_t file1_id, file2_id;
char filenames[2][MAX_FILENAME];
char out_data[PRINT_DATA_MAX_SIZE] = {0};
@@ -177,7 +177,7 @@ ph5diff_worker(int nID)
/*Recv parameters for diff from manager task */
MPI_Recv(&args, sizeof(args), MPI_BYTE, 0, MPI_TAG_ARGS, MPI_COMM_WORLD, &Status);
/*Do the diff */
- diffs.nfound = diff(file1_id, args.name1, file2_id, args.name2, &(args.options), args.type);
+ diffs.nfound = diff(file1_id, args.name1, file2_id, args.name2, &(args.options), &(args.argdata));
diffs.not_cmp = args.options.not_cmp;
/*If print buffer has something in it, request print token.*/
diff --git a/tools/lib/h5diff.c b/tools/lib/h5diff.c
index 4f82906..bbf9b39 100644
--- a/tools/lib/h5diff.c
+++ b/tools/lib/h5diff.c
@@ -366,6 +366,7 @@ static void build_match_list (const char *objname1, trav_info_t *info1, const ch
int path2_offset = 0;
int cmp;
trav_table_t *table;
+ size_t idx;
/* init */
trav_table_init( &table );
@@ -403,6 +404,14 @@ static void build_match_list (const char *objname1, trav_info_t *info1, const ch
infile[0] = 1;
infile[1] = 1;
trav_table_addflags(infile, path1_lp, info1->paths[curr1].type, table);
+ /* if the two point to the same target object,
+ * mark that in table */
+ if (info1->paths[curr1].fileno == info2->paths[curr2].fileno &&
+ info1->paths[curr1].objno == info2->paths[curr2].objno )
+ {
+ idx = table->nobjs - 1;
+ table->objs[idx].is_same_trgobj = 1;
+ }
}
curr1++;
curr2++;
@@ -673,6 +682,8 @@ hsize_t h5diff(const char *fname1,
if(!is_valid_options(options))
goto out;
+ options->cmn_objs = 1; /* eliminate warning */
+
/*-------------------------------------------------------------------------
* open the files first; if they are not valid, no point in continuing
*-------------------------------------------------------------------------
@@ -850,7 +861,17 @@ hsize_t h5diff(const char *fname1,
HDstrcat(obj2fullname, "/");
}
- options->cmn_objs = 1; /* eliminate warning */
+ /*
+ * If verbose options is used, need to traverse thorugh the list of objects
+ * in the group to print out objects information.
+ * Use h5tools_is_obj_same() to improve performance by skipping
+ * comparing details of same objects.
+ */
+ if(!(options->m_verbose || options->m_report))
+ {
+ if (h5tools_is_obj_same(file1_id,obj1fullname,file2_id,obj2fullname)!=0)
+ goto out;
+ }
/*---------------------------------------------
* check for following symlinks
@@ -934,18 +955,6 @@ hsize_t h5diff(const char *fname1,
{
/*
- * Use h5tools_is_obj_same() to improve performance by skipping
- * comparing details of same objects. If verbose options is used,
- * need to traverse thorugh the list of objects in the group and
- * print out object information. Details of same objects will be
- * skipped at diff() function.
- */
- if(!(options->m_verbose || options->m_report)) {
- if (h5tools_is_obj_same(file1_id,obj1fullname,file2_id,obj2fullname)!=0)
- goto out;
- }
-
- /*
* traverse group1
*/
trav_info_init(fname1, file1_id, &info1_grp);
@@ -1092,6 +1101,8 @@ hsize_t diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1,
char * grp2_path = "";
char * obj1_fullpath = NULL;
char * obj2_fullpath = NULL;
+ h5trav_type_t objtype;
+ diff_args_t argdata;
/*
@@ -1153,7 +1164,7 @@ hsize_t diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1,
int n;
int busyTasks = 0;
struct diffs_found nFoundbyWorker;
- struct diff_args args;
+ struct diff_mpi_args args;
int havePrintToken = 1;
MPI_Status Status;
@@ -1165,6 +1176,7 @@ hsize_t diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1,
{
if( table->objs[i].flags[0] && table->objs[i].flags[1])
{
+ objtype = table->objs[i].type;
/* make full path for obj1 */
obj1_fullpath = (char*)HDcalloc (strlen(grp1_path) + strlen (table->objs[i].name) + 1, sizeof (char));
HDstrcpy(obj1_fullpath, grp1_path);
@@ -1175,12 +1187,16 @@ hsize_t diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1,
HDstrcpy(obj2_fullpath, grp2_path);
HDstrcat(obj2_fullpath, table->objs[i].name);
+ /* Set argdata to pass other args into diff() */
+ argdata.type = objtype;
+ argdata.is_same_trgobj = table->objs[i].is_same_trgobj;
+
options->cmn_objs = 1;
if(!g_Parallel)
{
nfound += diff(file1_id, obj1_fullpath,
file2_id, obj2_fullpath,
- options, table->objs[i].type);
+ options, &argdata);
} /* end if */
#ifdef H5_HAVE_PARALLEL
else
@@ -1204,10 +1220,12 @@ hsize_t diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1,
MPI_Abort(MPI_COMM_WORLD, 0);
} /* end if */
+ /* set args struct to pass */
HDstrcpy(args.name1, obj1_fullpath);
HDstrcpy(args.name2, obj2_fullpath);
args.options = *options;
- args.type = table->objs[i].type;
+ args.argdata.type = objtype;
+ args.argdata.is_same_trgobj = table->objs[i].is_same_trgobj;
h5diffdebug2("busyTasks=%d\n", busyTasks);
/* if there are any outstanding print requests, let's handle one. */
@@ -1500,6 +1518,7 @@ hsize_t diff_compare(hid_t file1_id,
int is_dangle_link2 = 0;
const char *obj1name = obj1_name;
const char *obj2name = obj2_name;
+ diff_args_t argdata;
/* local variables for diff() */
h5trav_type_t obj1type, obj2type;
@@ -1707,9 +1726,13 @@ hsize_t diff_compare(hid_t file1_id,
goto out;
}
+ /* Set argdata to pass other args into diff() */
+ argdata.type = obj1type;
+ argdata.is_same_trgobj = 0;
+
nfound = diff(file1_id, obj1name,
file2_id, obj2name,
- options, obj1type);
+ options, &argdata);
out:
/*-------------------------------
@@ -1764,11 +1787,16 @@ out:
*
* Return: Number of differences found
*
- * Programmer: Pedro Vicente, pvn@ncsa.uiuc.edu
- * Date: May 9, 2003
- *
* Programmer: Jonathan Kim
* - add following links feature (Feb 11,2010)
+ * - Change to use diff_args_t to pass the rest of args.
+ * Passing through it instead of individual args provides smoother
+ * extensibility through its members along with MPI code update for ph5diff
+ * as it doesn't require interface change.
+ * (May 6,2011)
+ *
+ * Programmer: Pedro Vicente, pvn@ncsa.uiuc.edu
+ * Date: May 9, 2003
*-------------------------------------------------------------------------
*/
@@ -1777,7 +1805,7 @@ hsize_t diff(hid_t file1_id,
hid_t file2_id,
const char *path2,
diff_opt_t * options,
- h5trav_type_t type)
+ diff_args_t *argdata)
{
hid_t type1_id = (-1);
hid_t type2_id = (-1);
@@ -1850,24 +1878,24 @@ hsize_t diff(hid_t file1_id,
goto out2;
/*
- * Use h5tools_is_obj_same() to improve performance by skipping
- * comparing details of same objects,
- *
- * check if two paths point to the same object for hard links or
- * the option of follow link is used. h5tools_is_obj_same() is not
- * needed for other cases.
+ * If both points to the same target object, skip comparing details inside
+ * of the objects to improve performance.
+ * Always check for the hard links, otherwise if follow symlink option is
+ * specified.
+ *
+ * Perform this to match the outputs as bypassing.
*/
- is_hard_link = (type==H5TRAV_TYPE_DATASET ||
- type==H5TRAV_TYPE_NAMED_DATATYPE ||
- type==H5TRAV_TYPE_GROUP);
+ is_hard_link = (argdata->type == H5TRAV_TYPE_DATASET ||
+ argdata->type == H5TRAV_TYPE_NAMED_DATATYPE ||
+ argdata->type == H5TRAV_TYPE_GROUP);
if (options->follow_links || is_hard_link)
{
- if (h5tools_is_obj_same(file1_id, path1, file2_id, path2)!=0)
+ if (argdata->is_same_trgobj)
{
/* print information is only verbose option is used */
if(options->m_verbose || options->m_report)
{
- switch(type)
+ switch(argdata->type)
{
case H5TRAV_TYPE_DATASET:
do_print_objname("dataset", path1, path2, options);
@@ -1889,7 +1917,7 @@ hsize_t diff(hid_t file1_id,
break;
default:
parallel_print("Comparison not supported: <%s> and <%s> are of type %s\n",
- path1, path2, get_type(type) );
+ path1, path2, get_type(argdata->type) );
options->not_cmp = 1;
break;
} /* switch(type)*/
@@ -1898,10 +1926,10 @@ hsize_t diff(hid_t file1_id,
} /* if(options->m_verbose || options->m_report) */
goto out2;
- } /* h5tools_is_obj_same */
+ }
}
- switch(type)
+ switch(argdata->type)
{
/*----------------------------------------------------------------------
* H5TRAV_TYPE_DATASET
@@ -2031,10 +2059,15 @@ hsize_t diff(hid_t file1_id,
goto out;
}
+ /* Renew type in argdata to pass into diff().
+ * For recursive call, argdata.is_same_trgobj is already
+ * set from initial call, so don't reset here */
+ argdata->type = linkinfo1.trg_type;
+
/* call self to compare target object */
nfound += diff(file1_id, path1,
file2_id, path2,
- options, linkinfo1.trg_type);
+ options, argdata);
}
/* always print the number of differences found in verbose mode */
@@ -2083,9 +2116,14 @@ hsize_t diff(hid_t file1_id,
goto out;
}
+ /* Renew type in argdata to pass into diff().
+ * For recursive call, argdata.is_same_trgobj is already
+ * set from initial call, so don't reset here */
+ argdata->type = linkinfo1.trg_type;
+
nfound = diff(file1_id, path1,
file2_id, path2,
- options, linkinfo1.trg_type);
+ options, argdata);
}
} /* end if */
else
@@ -2116,7 +2154,7 @@ hsize_t diff(hid_t file1_id,
default:
if(options->m_verbose)
parallel_print("Comparison not supported: <%s> and <%s> are of type %s\n",
- path1, path2, get_type(type) );
+ path1, path2, get_type(argdata->type) );
options->not_cmp = 1;
break;
}
diff --git a/tools/lib/h5diff.h b/tools/lib/h5diff.h
index 19d5ed5..66c5188 100644
--- a/tools/lib/h5diff.h
+++ b/tools/lib/h5diff.h
@@ -22,6 +22,16 @@
#define MAX_FILENAME 1024
/*-------------------------------------------------------------------------
+ * This is used to pass multiple args into diff().
+ * Passing this instead of several each arg provides smoother extensibility
+ * through its members along with MPI code for ph5diff
+ * as it doesn't require interface change.
+ *------------------------------------------------------------------------*/
+typedef struct {
+ h5trav_type_t type;
+ hbool_t is_same_trgobj;
+} diff_args_t;
+/*-------------------------------------------------------------------------
* command line options
*-------------------------------------------------------------------------
*/
@@ -106,7 +116,7 @@ hsize_t diff( hid_t file1_id,
hid_t file2_id,
const char *path2,
diff_opt_t *options,
- h5trav_type_t type );
+ diff_args_t *argdata);
hsize_t diff_compare( hid_t file1_id,
const char *file1_name,
diff --git a/tools/lib/h5trav.c b/tools/lib/h5trav.c
index 5195261..c3cfddf 100644
--- a/tools/lib/h5trav.c
+++ b/tools/lib/h5trav.c
@@ -307,6 +307,8 @@ trav_info_add(trav_info_t *info, const char *path, h5trav_type_t obj_type)
idx = info->nused++;
info->paths[idx].path = HDstrdup(path);
info->paths[idx].type = obj_type;
+ info->paths[idx].fileno = 0;
+ info->paths[idx].objno = HADDR_UNDEF;
} /* end trav_info_add() */
@@ -327,10 +329,18 @@ int
trav_info_visit_obj(const char *path, const H5O_info_t *oinfo,
const char UNUSED *already_visited, void *udata)
{
+ size_t idx;
+ trav_info_t *info_p;
/* Add the object to the 'info' struct */
/* (object types map directly to "traversal" types) */
trav_info_add((trav_info_t *)udata, path, (h5trav_type_t)oinfo->type);
+ /* set object addr and fileno. These are for checking same object */
+ info_p = (trav_info_t *) udata;
+ idx = info_p->nused - 1;
+ info_p->paths[idx].objno = oinfo->addr;
+ info_p->paths[idx].fileno = oinfo->fileno;
+
return(0);
} /* end trav_info_visit_obj() */
@@ -656,6 +666,7 @@ trav_table_add(trav_table_t *table,
new = table->nobjs++;
table->objs[new].objno = oinfo ? oinfo->addr : HADDR_UNDEF;
table->objs[new].flags[0] = table->objs[new].flags[1] = 0;
+ table->objs[new].is_same_trgobj = 0;
table->objs[new].name = (char *)HDstrdup(path);
table->objs[new].type = oinfo ? (h5trav_type_t)oinfo->type : H5TRAV_TYPE_LINK;
table->objs[new].nlinks = 0;
@@ -739,6 +750,7 @@ void trav_table_addflags(unsigned *flags,
table->objs[new].objno = 0;
table->objs[new].flags[0] = flags[0];
table->objs[new].flags[1] = flags[1];
+ table->objs[new].is_same_trgobj = 0;
table->objs[new].name = (char *)HDstrdup(name);
table->objs[new].type = type;
table->objs[new].nlinks = 0;
diff --git a/tools/lib/h5trav.h b/tools/lib/h5trav.h
index 8eb93fa..da8dc69 100644
--- a/tools/lib/h5trav.h
+++ b/tools/lib/h5trav.h
@@ -65,6 +65,8 @@ typedef struct symlink_trav_t {
typedef struct trav_path_t {
char *path;
h5trav_type_t type;
+ haddr_t objno; /* object address */
+ unsigned long fileno; /* File number that object is located in */
} trav_path_t;
typedef struct trav_info_t {
@@ -95,6 +97,7 @@ typedef struct trav_link_t {
typedef struct trav_obj_t {
haddr_t objno; /* object address */
unsigned flags[2]; /* h5diff.object is present or not in both files*/
+ hbool_t is_same_trgobj; /* same target object? no need to compare */
char *name; /* name */
h5trav_type_t type; /* type of object */
trav_link_t *links; /* array of possible link names */
diff --git a/tools/lib/ph5diff.h b/tools/lib/ph5diff.h
index eb15f12..9476a71 100644
--- a/tools/lib/ph5diff.h
+++ b/tools/lib/ph5diff.h
@@ -36,12 +36,12 @@ H5TOOLS_DLLVAR char outBuff[];
H5TOOLS_DLLVAR int outBuffOffset;
H5TOOLS_DLLVAR FILE * overflow_file;
-struct diff_args
+struct diff_mpi_args
{
char name1[256];
char name2[256];
- h5trav_type_t type;
diff_opt_t options;
+ diff_args_t argdata; /* rest args */
};
struct diffs_found