summaryrefslogtreecommitdiffstats
path: root/src/H5FDmpio.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/H5FDmpio.c')
-rw-r--r--src/H5FDmpio.c232
1 files changed, 230 insertions, 2 deletions
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index d5aa170..50b5676 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -59,6 +59,24 @@ static char H5FD_mpi_native_g[] = "native";
*/
typedef struct H5FD_mpio_t {
H5FD_t pub; /*public stuff, must be first */
+
+/* For comparisons */
+#ifndef H5_HAVE_WIN32_API
+ /* On most systems the combination of device and i-node number uniquely
+ * identify a file. Note that Cygwin, MinGW and other Windows POSIX
+ * environments have the stat function (which fakes inodes)
+ * and will use the 'device + inodes' scheme as opposed to the
+ * Windows code further below.
+ */
+ dev_t device; /* file device number */
+ ino_t inode; /* file i-node number */
+
+#else
+ DWORD nFileIndexLow;
+ DWORD nFileIndexHigh;
+ DWORD dwVolumeSerialNumber;
+
+#endif
MPI_File f; /*MPIO file handle */
MPI_Comm comm; /*communicator */
MPI_Info info; /*file information */
@@ -78,8 +96,9 @@ static void *H5FD__mpio_fapl_get(H5FD_t *_file);
static void *H5FD__mpio_fapl_copy(const void *_old_fa);
static herr_t H5FD__mpio_fapl_free(void *_fa);
static H5FD_t *H5FD__mpio_open(const char *name, unsigned flags, hid_t fapl_id,
- haddr_t maxaddr);
+ haddr_t maxaddr);
static herr_t H5FD__mpio_close(H5FD_t *_file);
+static int H5FD__mpio_cmp(const H5FD_t *_f1, const H5FD_t *_f2);
static herr_t H5FD__mpio_query(const H5FD_t *_f1, unsigned long *flags);
static haddr_t H5FD__mpio_get_eoa(const H5FD_t *_file, H5FD_mem_t type);
static herr_t H5FD__mpio_set_eoa(H5FD_t *_file, H5FD_mem_t type, haddr_t addr);
@@ -115,7 +134,7 @@ static const H5FD_class_mpi_t H5FD_mpio_g = {
NULL, /*dxpl_free */
H5FD__mpio_open, /*open */
H5FD__mpio_close, /*close */
- NULL, /*cmp */
+ H5FD__mpio_cmp, /*cmp */
H5FD__mpio_query, /*query */
NULL, /*get_type_map */
NULL, /*alloc */
@@ -907,6 +926,87 @@ done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5FD_get_mpio_atomicity() */
+/*
+ * Function: H5FD_mpio_fileinfo_get
+ *
+ * Purpose: Implements a normal (posix) file open for MPI rank 0.
+ * Replicates the functionality of H5FD_sec2_open. We
+ * open the file and cache a few key structures before
+ * closing. These cached structures are those which
+ * are eventually utilized for MPIO file comparisons.
+ *
+ * N.B. The file handles returned by the collective MPI
+ * File open function are not guaranteed to have a relation to
+ * an actual posix file handle. This then, provides the
+ * requirement that we do a "normal" file open to provide
+ * an actual file handle with which we can gather more
+ * detailed information to eventually implement file
+ * comparisons (see: H5FD__mpio_cmp)
+ *
+ * Return: Success: Non-negative
+ *
+ * Failure: Negative
+ * Indicates too, that the information used for
+ * MPIO file comparisons will most likely not
+ * be initialized and this in turn can lead to
+ * runtime issues, e.g. File comparison failures.
+ */
+static herr_t
+H5FD_mpio_fileinfo_get(const char *name, unsigned flags, H5FD_mpio_t *file)
+{
+ int status;
+ int fd = -1; /* File descriptor */
+ int o_flags; /* Flags for open() call */
+ h5_stat_t sb;
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#ifdef H5FDmpio_DEBUG
+ if (H5FD_mpio_Debug[(int)'t'])
+ fprintf(stdout, "Entering H5FD_mpio_fileinfo_get\n");
+#endif
+
+#ifdef H5_HAVE_WIN32_API
+ struct _BY_HANDLE_FILE_INFORMATION fileinfo;
+ HANDLE hFile; /* Native windows file handle */
+#endif
+ o_flags = (H5F_ACC_RDWR & flags) ? O_RDWR : O_RDONLY;
+
+ /* Open the file */
+ if((fd = HDopen(name, o_flags, H5_POSIX_CREATE_MODE_RW)) < 0)
+ HMPI_GOTO_ERROR(FAIL, "HDopen failed", fd)
+
+ if((status = HDfstat(fd, &sb)) < 0)
+ HMPI_GOTO_ERROR(FAIL, "HDfstat failed", status)
+
+#ifdef H5_HAVE_WIN32_API
+ hFile = (HANDLE)_get_osfhandle(fd);
+ if(INVALID_HANDLE_VALUE == hFile)
+ HMPI_GOTO_ERROR(FAIL, "_get_osfhandle failed", -1)
+
+ if(!GetFileInformationByHandle((HANDLE)hFile, &fileinfo))
+ HMPI_GOTO_ERROR(FAIL, "GetFileInformationByHandle failed", 0)
+
+ file->nFileIndexHigh = fileinfo.nFileIndexHigh;
+ file->nFileIndexLow = fileinfo.nFileIndexLow;
+ file->dwVolumeSerialNumber = fileinfo.dwVolumeSerialNumber;
+#else /* H5_HAVE_WIN32_API */
+ file->device = sb.st_dev;
+ file->inode = sb.st_ino;
+#endif /* H5_HAVE_WIN32_API */
+
+done:
+ if(fd >= 0)
+ HDclose(fd);
+
+#ifdef H5FDmpio_DEBUG
+ if (H5FD_mpio_Debug[(int)'t'])
+ fprintf(stdout, "Leaving H5FD_mpio_fileinfo_get\n");
+#endif
+ FUNC_LEAVE_NOAPI(ret_value)
+}
+
/*-------------------------------------------------------------------------
* Function: H5FD__mpio_open
@@ -1039,6 +1139,11 @@ H5FD__mpio_open(const char *name, unsigned flags, hid_t fapl_id,
file->eof = H5FD_mpi_MPIOff_to_haddr(size);
file->local_eof = file->eof;
+ if (mpi_rank == 0) {
+ /* Gather some file info for future comparisons */
+ if (H5FD_mpio_fileinfo_get( name, flags, file ) < 0)
+ HMPI_GOTO_ERROR(NULL, "H5FD_mpio_fileinfo_get failed", -1)
+ }
/* Set return value */
ret_value = (H5FD_t*)file;
@@ -1064,6 +1169,129 @@ done:
/*-------------------------------------------------------------------------
+ * Function: H5FD__mpio_cmp
+ *
+ * Purpose: This version of the 'cmp' function is used to compare two
+ * files which have been created and opened using the MPI-IO
+ * driver.
+ * The peculiarity of this is that unlike POSIX io, the
+ * handle returned from an MPI_File_open operation may be
+ * an abstract value and not have any relation to an actual
+ * filestem handle. The net result is that additional
+ * filesystem information needs to be gathered to subsequently
+ * utilize the stronger filesystem based methodology used in
+ * other HDF5 drivers, e.g. H5FD_sec2_cmp()
+ * The approach is two fold:
+ * 1. The MPI communicators used to access parallel files
+ * will be compared.
+ * 2. MPI rank 0 is tasked with collecting the additional
+ * POSIX or Windows NTFS information that is subsequently
+ * used here for comparison purposes. The result is
+ * then broadcast to the participating MPI ranks to effect
+ * a global result.
+ *
+ * Return: An integer value similar to that returned by strcmp()
+
+ * NOTE: This function can't FAIL. In those cases where
+ * where we would normally return FAILURE, e.g. when MPI
+ * returns an error, we treat these as unequal comparisons.
+ *
+ * Programmer: Richard Warren
+ * Originally borrowed from H5FD_sec2_cmp (Robb Matzke) and
+ * modified as described above.
+ *
+ *-------------------------------------------------------------------------
+ */
+static int
+H5FD__mpio_cmp(const H5FD_t *_f1, const H5FD_t *_f2)
+{
+ const H5FD_mpio_t *f1 = (const H5FD_mpio_t *)_f1;
+ const H5FD_mpio_t *f2 = (const H5FD_mpio_t *)_f2;
+ int ret_value = 0;
+ int cmp_value = 0;
+ int mpi_result;
+ MPI_Group f1_grp;
+ MPI_Group f2_grp;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+ if ((mpi_result = MPI_Comm_group(f1->comm, &f1_grp)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(-1, "MPI_Comm_group(comm1) failed", mpi_result)
+
+ if ((mpi_result = MPI_Comm_group(f2->comm, &f2_grp)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(-1, "MPI_Comm_group(comm2) failed", mpi_result)
+
+ if ((mpi_result = MPI_Group_compare(f1_grp, f2_grp, &cmp_value)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(-1, "MPI_Group_compare failed", mpi_result)
+
+ /* The group compare return values can be one of the following:
+ * MPI_IDENT(0) == two groups/communicators are identical
+ * ---------------- Those below can lead to unexpected
+ * ---------------- results, so we will return unequal
+ * for the file comparison.
+ * MPI_CONGRUENT(1) == two groups/communicators are equal but
+ * are distinct communication domains
+ * MPI_SIMILAR(2) == two groups have the same members but
+ * ordering may be different
+ * MPI_UNEQUAL(3) == self descriptive (unequal)
+ *
+ * Note: Congruent groups would seem to satisfy the equality
+ * condition from the file perspective, but there may be conditions
+ * in which collective operations would cause an application to
+ * hang if two different communicators are in use, e.g. any
+ * sort of synchronization (Barrier, Bcast).
+ */
+
+ if (cmp_value >= MPI_CONGRUENT)
+ HGOTO_DONE(-1)
+
+ if (f1->mpi_rank == 0) {
+ /* Because MPI file handles may NOT have any relation to
+ * to actual file handle, we utilize a "regular" file open
+ * on MPI rank 0 prior to opening with the MPI-IO routines.
+ * The H5FD_mpio_t structure is utilized to cache the
+ * relevant comparison values which we use for comparisons
+ * below.
+ */
+#ifdef H5_HAVE_WIN32_API
+ if ((f1->dwVolumeSerialNumber < f2->dwVolumeSerialNumber) ||
+ (f1->nFileIndexHigh < f2->nFileIndexHigh) ||
+ (f1->nFileIndexLow < f2->nFileIndexLow))
+ cmp_value = -1;
+ else
+ if ((f1->dwVolumeSerialNumber > f2->dwVolumeSerialNumber) ||
+ (f1->nFileIndexHigh > f2->nFileIndexHigh) ||
+ (f1->nFileIndexLow > f2->nFileIndexLow))
+ cmp_value = 1;
+#else /* Not WIN32 */
+#ifdef H5_DEV_T_IS_SCALAR
+ if (f1->device < f2->device) cmp_value = -1;
+ else if(f1->device > f2->device) cmp_value = 1;
+#else /* H5_DEV_T_IS_SCALAR */
+ /* If dev_t isn't a scalar value on this system, just use memcmp to
+ * determine if the values are the same or not. The actual return value
+ * shouldn't really matter...
+ */
+ cmp_value = HDmemcmp(&(f1->device), &(f2->device), sizeof(dev_t));
+#endif /* H5_DEV_T_IS_SCALAR */
+ /* Continue checking ONLY if we haven't failed yet */
+ if (!cmp_value) {
+ if(f1->inode < f2->inode) cmp_value = -1;
+ else if(f1->inode > f2->inode) cmp_value = 1;
+#endif /* H5_HAVE_WIN32_API */
+ }
+ }
+ if (MPI_SUCCESS != (mpi_result = MPI_Bcast(&cmp_value, 1, MPI_INT, 0, f1->comm)))
+ HMPI_GOTO_ERROR(-1, "MPI_Bcast failed", mpi_result)
+
+ ret_value = cmp_value;
+done:
+
+ FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5FD__mpio_cmp() */
+
+
+/*-------------------------------------------------------------------------
* Function: H5FD__mpio_close
*
* Purpose: Closes a file. This is collective.