From 8af13859397ca1030dda5668017664692669d7e6 Mon Sep 17 00:00:00 2001 From: Leon Arber Date: Wed, 2 Feb 2005 18:01:42 -0500 Subject: [svn-r9925] Purpose: Bug Fix Description: Permanent fix for the incompatibilities between h5diff and h5repack. Solution: h5diff now contains the code to run both parallel and serial diffs. Depending on how the binary is called, it will run either the serial or the parallel versions respectively. Platforms tested: heping(serial + parallel), copper. Misc. update: --- tools/h5diff/h5diff_main.c | 63 +++-- tools/lib/h5diff.c | 660 ++++++++++++++------------------------------- tools/lib/h5diff.h | 16 -- tools/lib/h5diff_util.c | 6 +- tools/lib/ph5diff.h | 8 +- 5 files changed, 241 insertions(+), 512 deletions(-) diff --git a/tools/h5diff/h5diff_main.c b/tools/h5diff/h5diff_main.c index d25904f..1fc4569 100644 --- a/tools/h5diff/h5diff_main.c +++ b/tools/h5diff/h5diff_main.c @@ -24,7 +24,7 @@ static int check_f_input( const char* ); /*------------------------------------------------------------------------- * Function: main * - * Purpose: ph5diff main program + * Purpose: h5diff/ph5diff main program * * Return: An exit status of 0 means no differences were found, 1 means some * differences were found. @@ -44,6 +44,10 @@ static int check_f_input( const char* ); * * November 2004: Leon Arber (larber@uiuc.edu) * Additions that allow h5diff to be run in parallel + * + * This function drives the diff process and will do a serial or parallel diff depending + * on the value of the global variable g_Parallel (default is 0), set to 1 when the program + * is run as "ph5diff" *------------------------------------------------------------------------- */ @@ -56,27 +60,42 @@ int main(int argc, const char *argv[]) const char *objname1 = NULL; const char *objname2 = NULL; hsize_t nfound=0; + int nID = 0; int ret; diff_opt_t options; -#ifdef H5_HAVE_PH5DIFF - int nID; +#ifdef H5_HAVE_PARALLEL MPI_Status Status; +#endif + /* See what we were called as to determine whether to run serial or parallel version + * + * It has been determined that: + * If argv[0] is greater than 6 characters AND the last 7 equal "ph5diff" we run parallel + * In all other cases, we run serial */ - /*------------------------------------------------------------------------- - * Initialize the MPI environment - *------------------------------------------------------------------------- - */ - MPI_Init(&argc, (char***) &argv); + if( (strlen(argv[0]) > strlen("h5diff")) && (strcmp(argv[0] + (strlen(argv[0]) - strlen("ph5diff")), "ph5diff") == 0) ) + g_Parallel = 1; - MPI_Comm_rank(MPI_COMM_WORLD, &nID); - MPI_Comm_size(MPI_COMM_WORLD, &g_nTasks); + + if(g_Parallel) + { +#ifdef H5_HAVE_PARALLEL + MPI_Init(&argc, (char***) &argv); + + MPI_Comm_rank(MPI_COMM_WORLD, &nID); + MPI_Comm_size(MPI_COMM_WORLD, &g_nTasks); +#else + printf("You cannot run ph5diff unless you compiles a parallel build of HDF5\n"); + exit(2); +#endif + } + else + g_nTasks = 1; /* Have the manager process the command-line */ if(nID == 0) { -#endif memset(&options, 0, sizeof (diff_opt_t)); /*------------------------------------------------------------------------- @@ -217,12 +236,9 @@ int main(int argc, const char *argv[]) }/*for*/ - if(g_nTasks < 2) - nfound = h5diff(fname1,fname2,objname1,objname2,&options); - else - nfound = h5diff_parallel(fname1,fname2,objname1,objname2,&options); + nfound = h5diff(fname1,fname2,objname1,objname2,&options); -#ifdef H5_HAVE_PH5DIFF +#ifdef H5_HAVE_PARALLEL if(g_nTasks > 1) MPI_Barrier(MPI_COMM_WORLD); #endif @@ -252,26 +268,27 @@ int main(int argc, const char *argv[]) *------------------------------------------------------------------------- */ -#ifdef H5_HAVE_PH5DIFF - MPI_Finalize(); +#ifdef H5_HAVE_PARALLEL + if(g_Parallel) + MPI_Finalize(); #endif - + ret= (nfound==0 ? 0 : 1 ); if (options.err_stat) ret=-1; return ret; -#ifdef H5_HAVE_PH5DIFF } +#ifdef H5_HAVE_PARALLEL /* All the other tasks just sit around and wait to be assigned something to diff */ else { struct diff_args args; hid_t file1_id, file2_id; - char filenames[2][255]; + char filenames[2][1024]; outBuffOffset = 0; - - MPI_Recv(filenames, 255*2, MPI_CHAR, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &Status); + + MPI_Recv(filenames, 1024*2, MPI_CHAR, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &Status); if(Status.MPI_TAG == MPI_TAG_PARALLEL) { printf("We're in parallel mode...opening the files\n"); diff --git a/tools/lib/h5diff.c b/tools/lib/h5diff.c index 0b8bb7e..39be45b 100644 --- a/tools/lib/h5diff.c +++ b/tools/lib/h5diff.c @@ -46,6 +46,9 @@ print_objname (diff_opt_t * options, hsize_t nfound) * * Date: October 22, 2003 * + * Modifications: Jan 2005 Leon Arber, larber@uiuc.edu + * Added support for parallel diffing + * *------------------------------------------------------------------------- */ @@ -58,10 +61,10 @@ h5diff (const char *fname1, trav_info_t *info1 = NULL; trav_info_t *info2 = NULL; hid_t file1_id=(-1), file2_id=(-1); - char filenames[2][255]; + char filenames[2][1024]; hsize_t nfound = 0; - memset(filenames, 0, 255*2); + memset(filenames, 0, 1024*2); if (options->m_quiet && (options->m_verbose || options->m_report)) @@ -86,317 +89,13 @@ h5diff (const char *fname1, printf ("h5diff: <%s>: unable to open file\n", fname1); options->err_stat = 1; - goto out; - } - if ((file2_id = H5Fopen (fname2, H5F_ACC_RDONLY, H5P_DEFAULT)) < 0) - { - printf ("h5diff: <%s>: unable to open file\n", fname2); - options->err_stat = 1; - - goto out; - } - /* enable error reporting */ - } - H5E_END_TRY; - - -/*------------------------------------------------------------------------- - * get the number of objects in the files - *------------------------------------------------------------------------- - */ - nobjects1 = h5trav_getinfo (file1_id, NULL, 0); - nobjects2 = h5trav_getinfo (file2_id, NULL, 0); - - if (nobjects1 < 0 || nobjects2 < 0) - { - printf ("Error: Could not get get file contents\n"); - options->err_stat = 1; - goto out; - } - - assert (nobjects1 > 0); - assert (nobjects2 > 0); - -/*------------------------------------------------------------------------- - * get the list of objects in the files - *------------------------------------------------------------------------- - */ - - info1 = (trav_info_t *) malloc (nobjects1 * sizeof (trav_info_t)); - info2 = (trav_info_t *) malloc (nobjects2 * sizeof (trav_info_t)); - if (info1 == NULL || info2 == NULL) - { - printf ("Error: Not enough memory for object list\n"); - options->err_stat = 1; - if (info1) - h5trav_freeinfo (info1, nobjects1); - if (info2) - h5trav_freeinfo (info2, nobjects1); - goto out; - } - - h5trav_getinfo (file1_id, info1, 0); - h5trav_getinfo (file2_id, info2, 0); - -/*------------------------------------------------------------------------- - * object name was supplied - *------------------------------------------------------------------------- - */ - - if (objname1) - { - - assert (objname2); - options->cmn_objs = 1; /* eliminate warning */ - nfound = diff_compare (file1_id, fname1, objname1, nobjects1, info1, - file2_id, fname2, objname2, nobjects2, info2, - options); - } - -/*------------------------------------------------------------------------- - * compare all - *------------------------------------------------------------------------- - */ - - else - { - - nfound = diff_match (file1_id, nobjects1, info1, - file2_id, nobjects2, info2, options); - } - - - h5trav_freeinfo (info1, nobjects1); - h5trav_freeinfo (info2, nobjects2); - -out: - /* close */ - H5E_BEGIN_TRY - { - H5Fclose (file1_id); - H5Fclose (file2_id); - } - H5E_END_TRY; - - return nfound; -} - - - -/*------------------------------------------------------------------------- - * Function: diff_match - * - * Purpose: Find common objects; the algorithm used for this search is the - * cosequential match algorithm and is described in - * Folk, Michael; Zoellick, Bill. (1992). File Structures. Addison-Wesley. - * - * Return: Number of differences found - * - * Programmer: Pedro Vicente, pvn@ncsa.uiuc.edu - * - * Date: May 9, 2003 - * - *------------------------------------------------------------------------- - */ -hsize_t -diff_match (hid_t file1_id, - int nobjects1, - trav_info_t * info1, - hid_t file2_id, - int nobjects2, trav_info_t * info2, diff_opt_t * options) -{ - int more_names_exist = (nobjects1 > 0 && nobjects2 > 0) ? 1 : 0; - trav_table_t *table = NULL; - int cmp; - int curr1 = 0; - int curr2 = 0; - unsigned infile[2]; - char c1, c2; - hsize_t nfound = 0; - int i; - - /*------------------------------------------------------------------------- - * build the list - *------------------------------------------------------------------------- - */ - trav_table_init (&table); - - while (more_names_exist) - { - /* criteria is string compare */ - cmp = strcmp (info1[curr1].name, info2[curr2].name); - if (cmp == 0) +#ifdef H5_HAVE_PARALLEL + if(g_Parallel) { - infile[0] = 1; - infile[1] = 1; - trav_table_addflags (infile, info1[curr1].name, info1[curr1].type, - table); - - curr1++; - curr2++; + /* Let tasks know that they won't be needed */ + for(i=1; im_verbose) - { - printf ("\n"); - printf ("file1 file2\n"); - printf ("---------------------------------------\n"); - for (i = 0; i < table->nobjs; i++) - { - c1 = (table->objs[i].flags[0]) ? 'x' : ' '; - c2 = (table->objs[i].flags[1]) ? 'x' : ' '; - printf ("%5c %6c %-15s\n", c1, c2, table->objs[i].name); - } - printf ("\n"); - } - - - /*------------------------------------------------------------------------- - * do the diff for common objects - *------------------------------------------------------------------------- - */ - - for (i = 0; i < table->nobjs; i++) - { - if (table->objs[i].flags[0] && table->objs[i].flags[1]) - { - int workerFound = 0; - options->cmn_objs = 1; - nfound += diff (file1_id, - table->objs[i].name, - file2_id, - table->objs[i].name, options, table->objs[i].type); - } - - } - /* free table */ - trav_table_free (table); - - - /*------------------------------------------------------------------------- - * do the diff for the root. - * this is a special case, we get an ID for the root group and call diff() - * with this ID; it compares only the root group attributes - *------------------------------------------------------------------------- - */ - - /* the manager can do this. */ - nfound += diff (file1_id, "/", file2_id, "/", options, H5G_GROUP); - - return nfound; -} - - - - -/*------------------------------------------------------------------------- - * Function: h5diff_parallel - * - * Purpose: public function, can be called in an application program. - * return differences between 2 HDF5 files - * - * Return: Number of differences found. - * - * Programmer: Pedro Vicente, pvn@ncsa.uiuc.edu - * - * Date: October 22, 2003 - * - *------------------------------------------------------------------------- - */ - -hsize_t -h5diff_parallel (const char *fname1, - const char *fname2, - const char *objname1, const char *objname2, diff_opt_t * options) -{ - int nobjects1, nobjects2, i; - trav_info_t *info1 = NULL; - trav_info_t *info2 = NULL; - hid_t file1_id=(-1), file2_id=(-1); - char filenames[2][255]; - hsize_t nfound = 0; - - memset(filenames, 0, 255*2); - - - if (options->m_quiet && (options->m_verbose || options->m_report)) - { - printf - ("Error: -q (quiet mode) cannot be added to verbose or report modes\n"); - options->err_stat = 1; - return 0; - } - -/*------------------------------------------------------------------------- - * open the files first; if they are not valid, no point in continuing - *------------------------------------------------------------------------- - */ - - /* disable error reporting */ - H5E_BEGIN_TRY - { - /* Open the files */ - if ((file1_id = H5Fopen (fname1, H5F_ACC_RDONLY, H5P_DEFAULT)) < 0) - { - printf ("h5diff: <%s>: unable to open file\n", fname1); - options->err_stat = 1; - -#ifdef H5_HAVE_PH5DIFF - /* Let tasks know that they won't be needed */ - for(i=1; i: unable to open file\n", fname2); options->err_stat = 1; -#ifdef H5_HAVE_PH5DIFF - /* Let tasks know that they won't be needed */ - for(i=1; icmn_objs = 1; /* eliminate warning */ @@ -485,16 +190,25 @@ h5diff_parallel (const char *fname1, else { -#ifdef H5_HAVE_PH5DIFF - strncpy(filenames[0], fname1, 255); - strncpy(filenames[1], fname2, 255); - - /* Alert the worker tasks that there's going to be work. */ +#ifdef H5_HAVE_PARALLEL + if(g_Parallel) + { + if( (strlen(fname1) > 1024) || (strlen(fname2) > 1024)) + { + printf("The parallel diff only supports path names up to 1024 characters\n"); + MPI_Abort(MPI_COMM_WORLD, 0); + } + + strcpy(filenames[0], fname1); + strcpy(filenames[1], fname2); - for(i=1; icmn_objs = 1; -#ifndef H5_HAVE_PH5DIFF - nfound += diff (file1_id, - table->objs[i].name, - file2_id, - table->objs[i].name, options, table->objs[i].type); -#else - /* We're in parallel mode */ - - /*Set up args to pass to worker task. */ - strncpy(args.name, table->objs[i].name, 255); - args.options = *options; - args.type= table->objs[i].type; - - /* if there are any outstanding print requests, let's handle one. */ - if(busyTasks > 0) + if(!g_Parallel) + { + nfound += diff (file1_id, + table->objs[i].name, + file2_id, + table->objs[i].name, options, table->objs[i].type); + } +#ifdef H5_HAVE_PARALLEL + else { - int incomingMessage; - /* check if any tasks freed up, and didn't need to print. */ - MPI_Iprobe(MPI_ANY_SOURCE, MPI_TAG_DONE, MPI_COMM_WORLD, &incomingMessage, &Status); + /* We're in parallel mode */ - if(incomingMessage) + /*Set up args to pass to worker task. */ + if(strlen(table->objs[i].name) > 255) { - workerTasks[Status.MPI_SOURCE-1] = 1; - MPI_Recv(&nFoundbyWorker, 1, MPI_LONG_LONG, Status.MPI_SOURCE, MPI_TAG_DONE, MPI_COMM_WORLD, &Status); - nfound += nFoundbyWorker; - busyTasks--; + printf("The parallel diff only supports object names up to 255 characters\n"); + MPI_Abort(MPI_COMM_WORLD, 0); } + + strcpy(args.name, table->objs[i].name); + args.options = *options; + args.type= table->objs[i].type; - /* check to see if the print token was returned. */ - if(!havePrintToken) + /* if there are any outstanding print requests, let's handle one. */ + if(busyTasks > 0) { + int incomingMessage; + /* check if any tasks freed up, and didn't need to print. */ + MPI_Iprobe(MPI_ANY_SOURCE, MPI_TAG_DONE, MPI_COMM_WORLD, &incomingMessage, &Status); - /* check incoming queue for token */ - MPI_Iprobe(MPI_ANY_SOURCE, MPI_TAG_TOK_RETURN, MPI_COMM_WORLD, &incomingMessage, &Status); - - /* incoming token implies free task. */ if(incomingMessage) { workerTasks[Status.MPI_SOURCE-1] = 1; - MPI_Recv(&nFoundbyWorker, 1, MPI_LONG_LONG, Status.MPI_SOURCE, MPI_TAG_TOK_RETURN, MPI_COMM_WORLD, &Status); + MPI_Recv(&nFoundbyWorker, 1, MPI_LONG_LONG, Status.MPI_SOURCE, MPI_TAG_DONE, MPI_COMM_WORLD, &Status); nfound += nFoundbyWorker; busyTasks--; - havePrintToken = 1; } - } - /* check to see if anyone needs the print token. */ - if(havePrintToken) - { - /* check incoming queue for print token requests */ - MPI_Iprobe(MPI_ANY_SOURCE, MPI_TAG_TOK_REQUEST, MPI_COMM_WORLD, &incomingMessage, &Status); - if(incomingMessage) + /* check to see if the print token was returned. */ + if(!havePrintToken) { - MPI_Recv(NULL, 0, MPI_BYTE, Status.MPI_SOURCE, MPI_TAG_TOK_REQUEST, MPI_COMM_WORLD, &Status); - MPI_Send(NULL, 0, MPI_BYTE, Status.MPI_SOURCE, MPI_TAG_PRINT_TOK, MPI_COMM_WORLD); - havePrintToken = 0; + + /* check incoming queue for token */ + MPI_Iprobe(MPI_ANY_SOURCE, MPI_TAG_TOK_RETURN, MPI_COMM_WORLD, &incomingMessage, &Status); + + /* incoming token implies free task. */ + if(incomingMessage) + { + workerTasks[Status.MPI_SOURCE-1] = 1; + MPI_Recv(&nFoundbyWorker, 1, MPI_LONG_LONG, Status.MPI_SOURCE, MPI_TAG_TOK_RETURN, MPI_COMM_WORLD, &Status); + nfound += nFoundbyWorker; + busyTasks--; + havePrintToken = 1; + } + } + + /* check to see if anyone needs the print token. */ + if(havePrintToken) + { + /* check incoming queue for print token requests */ + MPI_Iprobe(MPI_ANY_SOURCE, MPI_TAG_TOK_REQUEST, MPI_COMM_WORLD, &incomingMessage, &Status); + if(incomingMessage) + { + MPI_Recv(NULL, 0, MPI_BYTE, Status.MPI_SOURCE, MPI_TAG_TOK_REQUEST, MPI_COMM_WORLD, &Status); + MPI_Send(NULL, 0, MPI_BYTE, Status.MPI_SOURCE, MPI_TAG_PRINT_TOK, MPI_COMM_WORLD); + havePrintToken = 0; + } } } - } - /* check array of tasks to see which ones are free. - * Manager task never does work, so freeTasks[0] is really - * worker task 0. */ + /* check array of tasks to see which ones are free. + * Manager task never does work, so freeTasks[0] is really + * worker task 0. */ - for(n=1; (n 0) /* make sure all tasks are done */ +#ifdef H5_HAVE_PARALLEL + if(g_Parallel) { - MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &Status); - if(Status.MPI_TAG == MPI_TAG_DONE) - { - MPI_Recv(&nFoundbyWorker, 1, MPI_LONG_LONG, Status.MPI_SOURCE, MPI_TAG_DONE, MPI_COMM_WORLD, &Status); - nfound += nFoundbyWorker; - busyTasks--; - } - else if(Status.MPI_TAG == MPI_TAG_TOK_REQUEST) + while(busyTasks > 0) /* make sure all tasks are done */ { - MPI_Recv(NULL, 0, MPI_BYTE, Status.MPI_SOURCE, MPI_TAG_TOK_REQUEST, MPI_COMM_WORLD, &Status); - if(havePrintToken) + MPI_Probe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &Status); + if(Status.MPI_TAG == MPI_TAG_DONE) { - MPI_Send(NULL, 0, MPI_BYTE, Status.MPI_SOURCE, MPI_TAG_PRINT_TOK, MPI_COMM_WORLD); - MPI_Recv(&nFoundbyWorker, 1, MPI_LONG_LONG, Status.MPI_SOURCE, MPI_TAG_TOK_RETURN, MPI_COMM_WORLD, &Status); + MPI_Recv(&nFoundbyWorker, 1, MPI_LONG_LONG, Status.MPI_SOURCE, MPI_TAG_DONE, MPI_COMM_WORLD, &Status); nfound += nFoundbyWorker; busyTasks--; } - else /* someone else must have it...wait for them to return it, then give it to the task that just asked for it. */ + else if(Status.MPI_TAG == MPI_TAG_TOK_REQUEST) + { + MPI_Recv(NULL, 0, MPI_BYTE, Status.MPI_SOURCE, MPI_TAG_TOK_REQUEST, MPI_COMM_WORLD, &Status); + if(havePrintToken) + { + MPI_Send(NULL, 0, MPI_BYTE, Status.MPI_SOURCE, MPI_TAG_PRINT_TOK, MPI_COMM_WORLD); + MPI_Recv(&nFoundbyWorker, 1, MPI_LONG_LONG, Status.MPI_SOURCE, MPI_TAG_TOK_RETURN, MPI_COMM_WORLD, &Status); + nfound += nFoundbyWorker; + busyTasks--; + } + else /* someone else must have it...wait for them to return it, then give it to the task that just asked for it. */ + { + int source = Status.MPI_SOURCE; + MPI_Recv(&nFoundbyWorker, 1, MPI_LONG_LONG, MPI_ANY_SOURCE, MPI_TAG_TOK_RETURN, MPI_COMM_WORLD, &Status); + nfound += nFoundbyWorker; + busyTasks--; + MPI_Send(NULL, 0, MPI_BYTE, source, MPI_TAG_PRINT_TOK, MPI_COMM_WORLD); + } + } + else if(Status.MPI_TAG == MPI_TAG_TOK_RETURN) { - int source = Status.MPI_SOURCE; - MPI_Recv(&nFoundbyWorker, 1, MPI_LONG_LONG, MPI_ANY_SOURCE, MPI_TAG_TOK_RETURN, MPI_COMM_WORLD, &Status); + MPI_Recv(&nFoundbyWorker, 1, MPI_LONG_LONG, Status.MPI_SOURCE, MPI_TAG_TOK_RETURN, MPI_COMM_WORLD, &Status); nfound += nFoundbyWorker; busyTasks--; - MPI_Send(NULL, 0, MPI_BYTE, source, MPI_TAG_PRINT_TOK, MPI_COMM_WORLD); + havePrintToken = 1; + } + else + { + printf("ERROR!! Invalid tag (%d) received \n", Status.MPI_TAG); + MPI_Abort(MPI_COMM_WORLD, 0); } } - else if(Status.MPI_TAG == MPI_TAG_TOK_RETURN) - { - MPI_Recv(&nFoundbyWorker, 1, MPI_LONG_LONG, Status.MPI_SOURCE, MPI_TAG_TOK_RETURN, MPI_COMM_WORLD, &Status); - nfound += nFoundbyWorker; - busyTasks--; - havePrintToken = 1; - } - else - { - printf("ERROR!! Invalid tag (%d) received \n", Status.MPI_TAG); - MPI_Abort(MPI_COMM_WORLD, 0); - } + + for(i=1; i #endif -- cgit v0.12