summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlbert Cheng <acheng@hdfgroup.org>2005-02-07 02:11:31 (GMT)
committerAlbert Cheng <acheng@hdfgroup.org>2005-02-07 02:11:31 (GMT)
commit6a7a80135d284f547c0e3069404d71702ae8fdb9 (patch)
treef11bf1a4888b0b9542653720739fcc57a5104b75
parent8d817baf4b85760070c4b9a9b60825ee1ed4cfc6 (diff)
downloadhdf5-6a7a80135d284f547c0e3069404d71702ae8fdb9.zip
hdf5-6a7a80135d284f547c0e3069404d71702ae8fdb9.tar.gz
hdf5-6a7a80135d284f547c0e3069404d71702ae8fdb9.tar.bz2
[svn-r9949] Purpose:
Bug fix Description: h5diff sometimes will 'exit(status)' but if it is in parallel mode, it needs to close up the workers and the MPI environment before exit(..). Solution: Created h5diff_exit() that will just exit in serial mode but does all the shutdown properly if in parallel mode. Platforms tested: tested in heping, serial and parallel. Misc. update:
-rw-r--r--tools/h5diff/h5diff_main.c185
1 files changed, 122 insertions, 63 deletions
diff --git a/tools/h5diff/h5diff_main.c b/tools/h5diff/h5diff_main.c
index d064619..4a15f78 100644
--- a/tools/h5diff/h5diff_main.c
+++ b/tools/h5diff/h5diff_main.c
@@ -20,6 +20,10 @@
static void usage(void);
static int check_n_input( const char* );
static int check_f_input( const char* );
+void h5diff_exit(int status);
+#ifdef H5_HAVE_PARALLEL
+static void ph5diff_worker( void );
+#endif
/*-------------------------------------------------------------------------
* Function: main
@@ -88,7 +92,7 @@ int main(int argc, const char *argv[])
MPI_Comm_size(MPI_COMM_WORLD, &g_nTasks);
#else
printf("You cannot run ph5diff unless you compiles a parallel build of HDF5\n");
- exit(2);
+ h5diff_exit(2);
#endif
}
else
@@ -280,88 +284,114 @@ int main(int argc, const char *argv[])
return ret;
}
#ifdef H5_HAVE_PARALLEL
- /* All the other tasks just sit around and wait to be assigned something to diff */
+ /* All other tasks become workers and wait for assignments. */
else
- {
- struct diff_args args;
- hid_t file1_id, file2_id;
- char filenames[2][1024];
+ ph5diff_worker();
+#endif
+}
+
+#ifdef H5_HAVE_PARALLEL
+/*-------------------------------------------------------------------------
+ * Function: ph5diff_worker
+ *
+ * Purpose: worker process of ph5diff
+ *
+ * Return: none
+ *
+ * Programmer: Leon Arber
+ * Date: January 2005
+ *
+ * Comments:
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static void
+ph5diff_worker(void)
+{
+ struct diff_args args;
+ hid_t file1_id, file2_id;
+ char filenames[2][1024];
+ hsize_t nfound=0;
+ int nID;
+ MPI_Status Status;
- outBuffOffset = 0;
+ MPI_Comm_rank(MPI_COMM_WORLD, &nID);
+ outBuffOffset = 0;
+
+ MPI_Recv(filenames, 1024*2, MPI_CHAR, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &Status);
+ if(Status.MPI_TAG == MPI_TAG_PARALLEL)
+ {
+ /*printf("We're in parallel mode...opening the files\n");*/
- MPI_Recv(filenames, 1024*2, MPI_CHAR, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &Status);
- if(Status.MPI_TAG == MPI_TAG_PARALLEL)
+ /* disable error reporting */
+ H5E_BEGIN_TRY
{
-/* printf("We're in parallel mode...opening the files\n");*/
-
- /* disable error reporting */
- H5E_BEGIN_TRY
+ /* Open the files */
+ if ((file1_id = H5Fopen (filenames[0], H5F_ACC_RDONLY, H5P_DEFAULT)) < 0)
{
- /* Open the files */
- if ((file1_id = H5Fopen (filenames[0], H5F_ACC_RDONLY, H5P_DEFAULT)) < 0)
- {
- printf ("h5diff Task [%d]: <%s>: unable to open file\n", nID, fname1);
- MPI_Abort(MPI_COMM_WORLD, 0);
- }
- if ((file2_id = H5Fopen (filenames[1], H5F_ACC_RDONLY, H5P_DEFAULT)) < 0)
- {
- printf ("h5diff Task [%d]: <%s>: unable to open file\n", nID, fname2);
- MPI_Abort(MPI_COMM_WORLD, 0);
- }
- /* enable error reporting */
+ printf ("h5diff Task [%d]: <%s>: unable to open file\n", nID, filenames[0]);
+ MPI_Abort(MPI_COMM_WORLD, 0);
+ }
+ if ((file2_id = H5Fopen (filenames[1], H5F_ACC_RDONLY, H5P_DEFAULT)) < 0)
+ {
+ printf ("h5diff Task [%d]: <%s>: unable to open file\n", nID, filenames[1]);
+ MPI_Abort(MPI_COMM_WORLD, 0);
}
- H5E_END_TRY;
+ /* enable error reporting */
+ }
+ H5E_END_TRY;
- while(1)
+ while(1)
+ {
+ MPI_Probe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &Status);
+
+ if(Status.MPI_TAG == MPI_TAG_ARGS)
{
- MPI_Probe(0, MPI_ANY_TAG, MPI_COMM_WORLD, &Status);
+ /*Recv parameters for diff from manager task */
+ MPI_Recv(&args, sizeof(struct diff_args), MPI_BYTE, 0, MPI_TAG_ARGS, MPI_COMM_WORLD, &Status);
+ /*Do the diff */
+ nfound = diff(file1_id, args.name, file2_id, args.name, &(args.options), args.type);
- if(Status.MPI_TAG == MPI_TAG_ARGS)
+ /*If print buffer has something in it, request print token.*/
+ if(outBuffOffset>0)
{
- /*Recv parameters for diff from manager task */
- MPI_Recv(&args, sizeof(struct diff_args), MPI_BYTE, 0, MPI_TAG_ARGS, MPI_COMM_WORLD, &Status);
- /*Do the diff */
- nfound = diff(file1_id, args.name, file2_id, args.name, &(args.options), args.type);
-
- /*If print buffer has something in it, request print token.*/
- if(outBuffOffset>0)
- {
- MPI_Send(NULL, 0, MPI_BYTE, 0, MPI_TAG_TOK_REQUEST, MPI_COMM_WORLD);
+ MPI_Send(NULL, 0, MPI_BYTE, 0, MPI_TAG_TOK_REQUEST, MPI_COMM_WORLD);
- /*Wait for print token. */
- MPI_Recv(NULL, 0, MPI_BYTE, 0, MPI_TAG_PRINT_TOK, MPI_COMM_WORLD, &Status);
+ /*Wait for print token. */
+ MPI_Recv(NULL, 0, MPI_BYTE, 0, MPI_TAG_PRINT_TOK, MPI_COMM_WORLD, &Status);
- /*When get token, print stuff out and return token */
- printf("%s", outBuff);
- memset(outBuff, 0, OUTBUFF_SIZE);
- outBuffOffset = 0;
+ /*When get token, print stuff out and return token */
+ printf("%s", outBuff);
+ memset(outBuff, 0, OUTBUFF_SIZE);
+ outBuffOffset = 0;
- MPI_Send(&nfound, 1, MPI_LONG_LONG, 0, MPI_TAG_TOK_RETURN, MPI_COMM_WORLD);
- }
- else
- MPI_Send(&nfound, 1, MPI_LONG_LONG, 0, MPI_TAG_DONE, MPI_COMM_WORLD);
-
- }
- else if(Status.MPI_TAG == MPI_TAG_END)
- {
- MPI_Recv(NULL, 0, MPI_BYTE, 0, MPI_TAG_END, MPI_COMM_WORLD, &Status);
-/* printf("exiting..., task: %d\n", nID);*/
- break;
+ MPI_Send(&nfound, 1, MPI_LONG_LONG, 0, MPI_TAG_TOK_RETURN, MPI_COMM_WORLD);
}
else
- {
- printf("ERROR....invalid tag received\n");
- MPI_Abort(MPI_COMM_WORLD, 0);
- }
+ MPI_Send(&nfound, 1, MPI_LONG_LONG, 0, MPI_TAG_DONE, MPI_COMM_WORLD);
}
+ else if(Status.MPI_TAG == MPI_TAG_END)
+ {
+ MPI_Recv(NULL, 0, MPI_BYTE, 0, MPI_TAG_END, MPI_COMM_WORLD, &Status);
+ /*printf("exiting..., task: %d\n", nID);*/
+ break;
+ }
+ else
+ {
+ printf("ERROR....invalid tag received\n");
+ MPI_Abort(MPI_COMM_WORLD, 0);
+ }
+
}
- MPI_Barrier(MPI_COMM_WORLD);
- MPI_Finalize();
}
-#endif
+ MPI_Barrier(MPI_COMM_WORLD);
+ MPI_Finalize();
}
+#endif
/*-------------------------------------------------------------------------
* Function: check_n_input
@@ -494,8 +524,37 @@ void usage(void)
printf(" h5diff file1 file1 /g1/dset1 /g1/dset2\n");
printf("\n");
printf(" to compare '/g1/dset1' and '/g1/dset2' in the same file\n");
- exit(0);
+ h5diff_exit(0);
}
+/*-------------------------------------------------------------------------
+ * Function: h5diff_exit
+ *
+ * Purpose: dismiss phdiff worker processes and exit
+ *
+ * Return: none
+ *
+ * Programmer: Albert Cheng
+ * Date: Feb 6, 2005
+ *
+ * Comments:
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+void h5diff_exit(int status)
+{
+#ifdef H5_HAVE_PARALLEL
+ /* if in parallel mode, dismiss workers, close down MPI, then exit */
+ if(g_nTasks > 1){
+ phdiff_dismiss_workers();
+ MPI_Barrier(MPI_COMM_WORLD);
+ }
+ if(g_Parallel)
+ MPI_Finalize();
+#endif
+ exit(status);
+}