diff options
Diffstat (limited to 'src/H5FDmpio.c')
-rw-r--r-- | src/H5FDmpio.c | 232 |
1 files changed, 230 insertions, 2 deletions
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index 3ab90aa..614c7ba 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -59,6 +59,24 @@ static char H5FD_mpi_native_g[] = "native"; */ typedef struct H5FD_mpio_t { H5FD_t pub; /*public stuff, must be first */ + +/* For comparisons */ +#ifndef H5_HAVE_WIN32_API + /* On most systems the combination of device and i-node number uniquely + * identify a file. Note that Cygwin, MinGW and other Windows POSIX + * environments have the stat function (which fakes inodes) + * and will use the 'device + inodes' scheme as opposed to the + * Windows code further below. + */ + dev_t device; /* file device number */ + ino_t inode; /* file i-node number */ + +#else + DWORD nFileIndexLow; + DWORD nFileIndexHigh; + DWORD dwVolumeSerialNumber; + +#endif MPI_File f; /*MPIO file handle */ MPI_Comm comm; /*communicator */ MPI_Info info; /*file information */ @@ -78,8 +96,9 @@ static void *H5FD__mpio_fapl_get(H5FD_t *_file); static void *H5FD__mpio_fapl_copy(const void *_old_fa); static herr_t H5FD__mpio_fapl_free(void *_fa); static H5FD_t *H5FD__mpio_open(const char *name, unsigned flags, hid_t fapl_id, - haddr_t maxaddr); + haddr_t maxaddr); static herr_t H5FD__mpio_close(H5FD_t *_file); +static int H5FD__mpio_cmp(const H5FD_t *_f1, const H5FD_t *_f2); static herr_t H5FD__mpio_query(const H5FD_t *_f1, unsigned long *flags); static haddr_t H5FD__mpio_get_eoa(const H5FD_t *_file, H5FD_mem_t type); static herr_t H5FD__mpio_set_eoa(H5FD_t *_file, H5FD_mem_t type, haddr_t addr); @@ -115,7 +134,7 @@ static const H5FD_class_mpi_t H5FD_mpio_g = { NULL, /*dxpl_free */ H5FD__mpio_open, /*open */ H5FD__mpio_close, /*close */ - NULL, /*cmp */ + H5FD__mpio_cmp, /*cmp */ H5FD__mpio_query, /*query */ NULL, /*get_type_map */ NULL, /*alloc */ @@ -907,6 +926,87 @@ done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5FD_get_mpio_atomicity() */ +/* + * Function: H5FD_mpio_fileinfo_get + * + * Purpose: Implements a normal (posix) file open for MPI rank 0. + * Replicates the functionality of H5FD_sec2_open. We + * open the file and cache a few key structures before + * closing. These cached structures are those which + * are eventually utilized for MPIO file comparisons. + * + * N.B. The file handles returned by the collective MPI + * File open function are not guaranteed to have a relation to + * an actual posix file handle. This then, provides the + * requirement that we do a "normal" file open to provide + * an actual file handle with which we can gather more + * detailed information to eventually implement file + * comparisons (see: H5FD__mpio_cmp) + * + * Return: Success: Non-negative + * + * Failure: Negative + * Indicates too, that the information used for + * MPIO file comparisons will most likely not + * be initialized and this in turn can lead to + * runtime issues, e.g. File comparison failures. + */ +static herr_t +H5FD_mpio_fileinfo_get(const char *name, unsigned flags, H5FD_mpio_t *file) +{ + int status; + int fd = -1; /* File descriptor */ + int o_flags; /* Flags for open() call */ + h5_stat_t sb; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_Debug[(int)'t']) + fprintf(stdout, "Entering H5FD_mpio_fileinfo_get\n"); +#endif + +#ifdef H5_HAVE_WIN32_API + struct _BY_HANDLE_FILE_INFORMATION fileinfo; + HANDLE hFile; /* Native windows file handle */ +#endif + o_flags = (H5F_ACC_RDWR & flags) ? O_RDWR : O_RDONLY; + + /* Open the file */ + if((fd = HDopen(name, o_flags, H5_POSIX_CREATE_MODE_RW)) < 0) + HMPI_GOTO_ERROR(FAIL, "HDopen failed", fd) + + if((status = HDfstat(fd, &sb)) < 0) + HMPI_GOTO_ERROR(FAIL, "HDfstat failed", status) + +#ifdef H5_HAVE_WIN32_API + hFile = (HANDLE)_get_osfhandle(fd); + if(INVALID_HANDLE_VALUE == hFile) + HMPI_GOTO_ERROR(FAIL, "_get_osfhandle failed", -1) + + if(!GetFileInformationByHandle((HANDLE)hFile, &fileinfo)) + HMPI_GOTO_ERROR(FAIL, "GetFileInformationByHandle failed", 0) + + file->nFileIndexHigh = fileinfo.nFileIndexHigh; + file->nFileIndexLow = fileinfo.nFileIndexLow; + file->dwVolumeSerialNumber = fileinfo.dwVolumeSerialNumber; +#else /* H5_HAVE_WIN32_API */ + file->device = sb.st_dev; + file->inode = sb.st_ino; +#endif /* H5_HAVE_WIN32_API */ + +done: + if(fd >= 0) + HDclose(fd); + +#ifdef H5FDmpio_DEBUG + if (H5FD_mpio_Debug[(int)'t']) + fprintf(stdout, "Leaving H5FD_mpio_fileinfo_get\n"); +#endif + FUNC_LEAVE_NOAPI(ret_value) +} + /*------------------------------------------------------------------------- * Function: H5FD__mpio_open @@ -1039,6 +1139,11 @@ H5FD__mpio_open(const char *name, unsigned flags, hid_t fapl_id, file->eof = H5FD_mpi_MPIOff_to_haddr(size); file->local_eof = file->eof; + if (mpi_rank == 0) { + /* Gather some file info for future comparisons */ + if (H5FD_mpio_fileinfo_get( name, flags, file ) < 0) + HMPI_GOTO_ERROR(NULL, "H5FD_mpio_fileinfo_get failed", -1) + } /* Set return value */ ret_value = (H5FD_t*)file; @@ -1064,6 +1169,129 @@ done: /*------------------------------------------------------------------------- + * Function: H5FD__mpio_cmp + * + * Purpose: This version of the 'cmp' function is used to compare two + * files which have been created and opened using the MPI-IO + * driver. + * The peculiarity of this is that unlike POSIX io, the + * handle returned from an MPI_File_open operation may be + * an abstract value and not have any relation to an actual + * filestem handle. The net result is that additional + * filesystem information needs to be gathered to subsequently + * utilize the stronger filesystem based methodology used in + * other HDF5 drivers, e.g. H5FD_sec2_cmp() + * The approach is two fold: + * 1. The MPI communicators used to access parallel files + * will be compared. + * 2. MPI rank 0 is tasked with collecting the additional + * POSIX or Windows NTFS information that is subsequently + * used here for comparison purposes. The result is + * then broadcast to the participating MPI ranks to effect + * a global result. + * + * Return: An integer value similar to that returned by strcmp() + + * NOTE: This function can't FAIL. In those cases where + * where we would normally return FAILURE, e.g. when MPI + * returns an error, we treat these as unequal comparisons. + * + * Programmer: Richard Warren + * Originally borrowed from H5FD_sec2_cmp (Robb Matzke) and + * modified as described above. + * + *------------------------------------------------------------------------- + */ +static int +H5FD__mpio_cmp(const H5FD_t *_f1, const H5FD_t *_f2) +{ + const H5FD_mpio_t *f1 = (const H5FD_mpio_t *)_f1; + const H5FD_mpio_t *f2 = (const H5FD_mpio_t *)_f2; + int ret_value = 0; + int cmp_value = 0; + int mpi_result; + MPI_Group f1_grp; + MPI_Group f2_grp; + + FUNC_ENTER_NOAPI_NOINIT + + if ((mpi_result = MPI_Comm_group(f1->comm, &f1_grp)) != MPI_SUCCESS) + HMPI_GOTO_ERROR(-1, "MPI_Comm_group(comm1) failed", mpi_result) + + if ((mpi_result = MPI_Comm_group(f2->comm, &f2_grp)) != MPI_SUCCESS) + HMPI_GOTO_ERROR(-1, "MPI_Comm_group(comm2) failed", mpi_result) + + if ((mpi_result = MPI_Group_compare(f1_grp, f2_grp, &cmp_value)) != MPI_SUCCESS) + HMPI_GOTO_ERROR(-1, "MPI_Group_compare failed", mpi_result) + + /* The group compare return values can be one of the following: + * MPI_IDENT(0) == two groups/communicators are identical + * ---------------- Those below can lead to unexpected + * ---------------- results, so we will return unequal + * for the file comparison. + * MPI_CONGRUENT(1) == two groups/communicators are equal but + * are distinct communication domains + * MPI_SIMILAR(2) == two groups have the same members but + * ordering may be different + * MPI_UNEQUAL(3) == self descriptive (unequal) + * + * Note: Congruent groups would seem to satisfy the equality + * condition from the file perspective, but there may be conditions + * in which collective operations would cause an application to + * hang if two different communicators are in use, e.g. any + * sort of synchronization (Barrier, Bcast). + */ + + if (cmp_value >= MPI_CONGRUENT) + HGOTO_DONE(-1) + + if (f1->mpi_rank == 0) { + /* Because MPI file handles may NOT have any relation to + * to actual file handle, we utilize a "regular" file open + * on MPI rank 0 prior to opening with the MPI-IO routines. + * The H5FD_mpio_t structure is utilized to cache the + * relevant comparison values which we use for comparisons + * below. + */ +#ifdef H5_HAVE_WIN32_API + if ((f1->dwVolumeSerialNumber < f2->dwVolumeSerialNumber) || + (f1->nFileIndexHigh < f2->nFileIndexHigh) || + (f1->nFileIndexLow < f2->nFileIndexLow)) + cmp_value = -1; + else + if ((f1->dwVolumeSerialNumber > f2->dwVolumeSerialNumber) || + (f1->nFileIndexHigh > f2->nFileIndexHigh) || + (f1->nFileIndexLow > f2->nFileIndexLow)) + cmp_value = 1; +#else /* Not WIN32 */ +#ifdef H5_DEV_T_IS_SCALAR + if (f1->device < f2->device) cmp_value = -1; + else if(f1->device > f2->device) cmp_value = 1; +#else /* H5_DEV_T_IS_SCALAR */ + /* If dev_t isn't a scalar value on this system, just use memcmp to + * determine if the values are the same or not. The actual return value + * shouldn't really matter... + */ + cmp_value = HDmemcmp(&(f1->device), &(f2->device), sizeof(dev_t)); +#endif /* H5_DEV_T_IS_SCALAR */ + /* Continue checking ONLY if we haven't failed yet */ + if (!cmp_value) { + if(f1->inode < f2->inode) cmp_value = -1; + else if(f1->inode > f2->inode) cmp_value = 1; +#endif /* H5_HAVE_WIN32_API */ + } + } + if (MPI_SUCCESS != (mpi_result = MPI_Bcast(&cmp_value, 1, MPI_INT, 0, f1->comm))) + HMPI_GOTO_ERROR(-1, "MPI_Bcast failed", mpi_result) + + ret_value = cmp_value; +done: + + FUNC_LEAVE_NOAPI(ret_value) +} /* end H5FD__mpio_cmp() */ + + +/*------------------------------------------------------------------------- * Function: H5FD__mpio_close * * Purpose: Closes a file. This is collective. |