summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2003-12-29 19:34:11 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2003-12-29 19:34:11 (GMT)
commit5db6c61f18198ac4477a6ba99d405ff82cf467a7 (patch)
tree4cf9ef4d947830f28f12d110148c3bfea6ff49e9
parentb6fd9f9d760dbe4e6ab530b12e0672088570224a (diff)
downloadhdf5-5db6c61f18198ac4477a6ba99d405ff82cf467a7.zip
hdf5-5db6c61f18198ac4477a6ba99d405ff82cf467a7.tar.gz
hdf5-5db6c61f18198ac4477a6ba99d405ff82cf467a7.tar.bz2
[svn-r7993] Purpose:
Code cleanup, bug fixes Description: Wrap up rest of changes necessary for fixing the "short" MPI-I/O read problem that Robb reported. Platforms tested: FreeBSD 4.9 (sleipnir) too minor to require h5committest
-rwxr-xr-xconfigure1
-rw-r--r--configure.in29
-rw-r--r--release_docs/RELEASE.txt3
-rw-r--r--src/H5FDfphdf5.c125
-rw-r--r--src/H5FDmpio.c18
-rw-r--r--src/H5FDmpiposix.c2
-rw-r--r--src/H5FDsec2.c2
-rw-r--r--src/H5FPclient.c4
-rw-r--r--src/H5FPprivate.h2
-rw-r--r--testpar/testphdf5.h1
10 files changed, 89 insertions, 98 deletions
diff --git a/configure b/configure
index 722a680..50cbb5a 100755
--- a/configure
+++ b/configure
@@ -32326,7 +32326,6 @@ fi
rm -f conftest.$ac_objext conftest$ac_exeext conftest.$ac_ext
fi
-
MPE=yes
# Check whether --with-mpe or --without-mpe was given.
diff --git a/configure.in b/configure.in
index 17669ee..c2e9ef8 100644
--- a/configure.in
+++ b/configure.in
@@ -2194,35 +2194,6 @@ if test -n "$PARALLEL"; then
)
fi
-dnl ----------------------------------------------------------------------
-dnl Block the MPI_Get_count code since it does not work
-dnl dnl Check whether MPI_Get_count actually works correctly on this
-dnl dnl platform.
-dnl AC_MSG_CHECKING(whether a MPI_Get_count works correctly)
-dnl AC_TRY_RUN([
-dnl #include <mpi.h>
-dnl
-dnl int main(int argc, char **argv)
-dnl {
-dnl MPI_Status mpi_stat;
-dnl int bytes_read = 0, ret;
-dnl
-dnl MPI_Init(&argc, &argv);
-dnl memset(&mpi_stat, 0, sizeof(MPI_Status)); /* zero out status */
-dnl ret = MPI_Get_count(&mpi_stat, MPI_BYTE, &bytes_read);
-dnl MPI_Finalize();
-dnl
-dnl /* this returns TRUE if bytes_read is 0...the shell thinks that the
-dnl * program fails, but we want it to fail of course so switch the
-dnl * "true"/"false" parts of the TRY_RUN macro */
-dnl return bytes_read == 0;
-dnl }
-dnl ],
-dnl AC_MSG_RESULT(no),
-dnl AC_MSG_RESULT(yes)
-dnl CPPFLAGS="$CPPFLAGS -DMPI_GET_COUNT_WORKS",AC_MSG_RESULT(no))
-dnl ----------------------------------------------------------------------
-
dnl --------------------------------------------------------------------
dnl Do we want MPE instrumentation feature on?
dnl
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt
index ca689d9..c2e23dd 100644
--- a/release_docs/RELEASE.txt
+++ b/release_docs/RELEASE.txt
@@ -95,6 +95,9 @@ Bug Fixes since HDF5-1.6.0 release
-------
- Fixed bug with variable-length datatypes used in compound datatypes.
SLU - 2003/12/29
+ - Fixed bug in parallel I/O routines that would cause reads from
+ "short datasets" (datasets which were only partially written out)
+ to return invalid data. QAK & AKC - 2003/12/19
- Fixed bug where scalar dataspaces for attributes were reporting as
simple dataspaces. QAK - 2003/12/13
- Fixed problem with selection offsets of hyperslab selections in
diff --git a/src/H5FDfphdf5.c b/src/H5FDfphdf5.c
index 6ee76f6..8198165 100644
--- a/src/H5FDfphdf5.c
+++ b/src/H5FDfphdf5.c
@@ -1160,14 +1160,11 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id,
haddr_t addr, size_t size, void *buf)
{
H5FD_fphdf5_t *file = (H5FD_fphdf5_t*)_file;
- MPI_Offset mpi_off;
MPI_Status status;
- int mrc;
- MPI_Datatype buf_type;
- int size_i;
- int bytes_read;
- int n;
- unsigned use_view_this_time = 0;
+ MPI_Offset mpi_off;
+ int size_i; /* Integer copy of 'size' to read */
+ int bytes_read; /* Number of bytes read in */
+ int mrc; /* MPI return code */
herr_t ret_value = SUCCEED;
FUNC_ENTER_NOAPI(H5FD_fphdf5_read, FAIL)
@@ -1189,14 +1186,18 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id,
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr_t to MPI offset")
size_i = (int)size;
-
if ((hsize_t)size_i != size)
HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size_t to int")
- /* Only look for MPI views for raw data transfers */
- if (mem_type == H5FD_MEM_DRAW) {
+ /* Only do real MPI stuff for raw data transfers */
+ if(mem_type==H5FD_MEM_DRAW) {
H5P_genplist_t *plist;
- H5FD_mpio_xfer_t xfer_mode; /* I/O tranfer mode */
+ H5FD_mpio_xfer_t xfer_mode; /* I/O tranfer mode */
+ MPI_Datatype buf_type; /* MPI datatype for I/O operation */
+ int n;
+ int type_size; /* MPI datatype used for I/O's size */
+ int io_size; /* Actual number of bytes requested */
+ unsigned use_view_this_time = 0; /* Flag to indicate we are using an MPI view */
/* Obtain the data transfer properties */
if ((plist = H5I_object(dxpl_id)) == NULL)
@@ -1239,6 +1240,13 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id,
if ((mrc = MPI_File_read_at_all(file->f, mpi_off, buf, size_i,
buf_type, &status)) != MPI_SUCCESS)
HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mrc)
+
+ /*
+ * Reset the file view when we used MPI derived types
+ */
+ if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE,
+ H5FD_mpio_native, file->info)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc)
} else {
/*
* Prepare for a simple xfer of a contiguous block of bytes. The
@@ -1252,21 +1260,33 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id,
HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mrc)
}
- /*
- * MPI_Get_count incorrectly returns negative count; fake a complete
- * read.
+ /* How many bytes were actually read? */
+ /* [This works because the "basic elements" we use for all our MPI derived
+ * types are MPI_BYTE. We should be using the 'buf_type' for the MPI
+ * datatype in this call though... (We aren't because using it causes
+ * the LANL "qsc" machine to dump core - 12/19/03) - QAK]
*/
- bytes_read = size_i;
+ if (MPI_SUCCESS != (mrc=MPI_Get_elements(&status, MPI_BYTE, &bytes_read)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mrc)
+
+ /* Get the type's size */
+ if (MPI_SUCCESS != (mrc=MPI_Type_size(buf_type,&type_size)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mrc)
+
+ /* Compute the actual number of bytes requested */
+ io_size=type_size*size_i;
+
+ /* Check for read failure */
+ if (bytes_read<0 || bytes_read>io_size)
+ HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed")
/*
- * Reset the file view when we used MPI derived types
+ * This gives us zeroes beyond end of physical MPI file.
*/
- if (use_view_this_time)
- if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE,
- H5FD_mpio_native, file->info)) != MPI_SUCCESS)
- HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc)
-
- } else {
+ if ((n=(io_size-bytes_read)) > 0)
+ HDmemset((char*)buf+bytes_read, 0, (size_t)n);
+ } /* end if */
+ else {
/*
* This is metadata - we want to try to read it from the SAP
* first.
@@ -1276,7 +1296,7 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id,
if (H5FP_request_read_metadata(_file, file->file_id, dxpl_id, mem_type,
addr, size, (uint8_t**)&buf,
- &bytes_read, &req_id, &sap_status) != SUCCEED) {
+ &req_id, &sap_status) != SUCCEED) {
/* FIXME: The read failed, for some reason */
HDfprintf(stderr, "%s:%d: Metadata cache read failed!\n", FUNC, __LINE__);
}
@@ -1297,27 +1317,6 @@ HDfprintf(stderr, "%s:%d: Metadata cache read failed!\n", FUNC, __LINE__);
}
} /* end else */
- /* Check for read failure */
- if (bytes_read < 0 || bytes_read > size_i)
- HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed")
-
- /*
- * This gives us zeroes beyond end of physical MPI file. What about
- * reading past logical end of HDF5 file???
- */
- n = size_i - bytes_read;
-
- if (n > 0) {
- if (use_view_this_time)
- /*
- * INCOMPLETE rky 1998-09-18
- * Haven't implemented reading zeros beyond EOF. What to do???
- */
- HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "eof file read failed")
-
- memset((char*)buf + bytes_read, 0, (size_t)n);
- }
-
done:
FUNC_LEAVE_NOAPI(ret_value)
}
@@ -1440,6 +1439,8 @@ H5FD_fphdf5_write_real(H5FD_t *_file, H5FD_mem_t mem_type, H5P_genplist_t *plist
MPI_Offset mpi_off;
int mrc;
int bytes_written;
+ int type_size; /* MPI datatype used for I/O's size */
+ int io_size; /* Actual number of bytes requested */
unsigned use_view_this_time = 0;
herr_t ret_value = SUCCEED;
@@ -1504,28 +1505,38 @@ H5FD_fphdf5_write_real(H5FD_t *_file, H5FD_mem_t mem_type, H5P_genplist_t *plist
if ((mrc = MPI_File_write_at_all(file->f, mpi_off, (void*)buf,
size, buf_type, &status)) != MPI_SUCCESS)
HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mrc)
- } else {
+
+ /* Reset the file view when we used MPI derived types */
+ if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE,
+ H5FD_mpio_native, file->info)) != MPI_SUCCESS)
+ HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc)
+ } /* end if */
+ else {
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
if ((mrc = MPI_File_write_at(file->f, mpi_off, (void*)buf,
size, buf_type, &status)) != MPI_SUCCESS)
HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mrc)
}
- /* Reset the file view when we used MPI derived types */
- if (use_view_this_time)
- if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE,
- H5FD_mpio_native, file->info)) != MPI_SUCCESS)
- HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc)
-
- /*
- * MPI_Get_count incorrectly returns negative count; fake a complete
- * write (use size for both parameters).
+ /* How many bytes were actually written? */
+ /* [This works because the "basic elements" we use for all our MPI derived
+ * types are MPI_BYTE. We should be using the 'buf_type' for the MPI
+ * datatype in this call though... (We aren't because using it causes
+ * the LANL "qsc" machine to dump core - 12/19/03) - QAK]
*/
- bytes_written = size;
+ if (MPI_SUCCESS != (mrc=MPI_Get_elements(&status, MPI_BYTE, &bytes_written)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mrc)
+
+ /* Get the type's size */
+ if (MPI_SUCCESS != (mrc=MPI_Type_size(buf_type,&type_size)))
+ HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mrc)
+
+ /* Compute the actual number of bytes requested */
+ io_size=type_size*size;
/* Check for write failure */
- if (bytes_written < 0 || bytes_written > size)
- HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file write failed")
+ if (bytes_written!=io_size)
+ HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "file write failed")
/* Forget the EOF value (see H5FD_fphdf5_get_eof()) */
file->eof = HADDR_UNDEF;
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index 1b0f461..61d97da 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -40,8 +40,7 @@
/*
* The driver identification number, initialized at runtime if H5_HAVE_PARALLEL
* is defined. This allows applications to still have the H5FD_MPIO
- * "constants" in their source code (it also makes this file strictly ANSI
- * compliant when H5_HAVE_PARALLEL isn't defined)
+ * "constants" in their source code.
*/
static hid_t H5FD_MPIO_g = 0;
@@ -1323,6 +1322,10 @@ done:
* to HADDR_UNDEF which is the error return value of this
* function.
*
+ * Keeping the EOF updated (during write calls) is expensive
+ * because any process may extend the physical end of the
+ * file. -QAK
+ *
* Return: Success: The end-of-address marker.
*
* Failure: HADDR_UNDEF
@@ -1563,7 +1566,9 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
/* How many bytes were actually read? */
/* [This works because the "basic elements" we use for all our MPI derived
- * types are MPI_BYTE - QAK]
+ * types are MPI_BYTE. We should be using the 'buf_type' for the MPI
+ * datatype in this call though... (We aren't because using it causes
+ * the LANL "qsc" machine to dump core - 12/19/03) - QAK]
*/
if (MPI_SUCCESS != (mpi_code=MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read)))
HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
@@ -1879,7 +1884,9 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
/* How many bytes were actually written? */
/* [This works because the "basic elements" we use for all our MPI derived
- * types are MPI_BYTE - QAK]
+ * types are MPI_BYTE. We should be using the 'buf_type' for the MPI
+ * datatype in this call though... (We aren't because using it causes
+ * the LANL "qsc" machine to dump core - 12/19/03) - QAK]
*/
if (MPI_SUCCESS != (mpi_code=MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_written)))
HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
@@ -1903,7 +1910,7 @@ done:
/* if only p<round> writes, need to broadcast the ret_value to other processes */
if ((type!=H5FD_MEM_DRAW) && H5_mpi_1_metawrite_g) {
if (MPI_SUCCESS != (mpi_code=MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm)))
- HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
+ HMPI_DONE_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
} /* end if */
#endif /* OLD_METADATA_WRITE */
@@ -1912,6 +1919,7 @@ done:
fprintf(stdout, "proc %d: Leaving H5FD_mpio_write with ret_value=%d\n",
file->mpi_rank, ret_value );
#endif
+
FUNC_LEAVE_NOAPI(ret_value)
}
diff --git a/src/H5FDmpiposix.c b/src/H5FDmpiposix.c
index 647fb35..4dadb1b 100644
--- a/src/H5FDmpiposix.c
+++ b/src/H5FDmpiposix.c
@@ -1209,7 +1209,7 @@ H5FD_mpiposix_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t UNUSED dxpl_id,
if (0==nbytes) {
/* end of file but not end of format address space */
HDmemset(buf, 0, size);
- size = 0;
+ break;
} /* end if */
assert(nbytes>=0);
assert((size_t)nbytes<=size);
diff --git a/src/H5FDsec2.c b/src/H5FDsec2.c
index 5b73475..5924c0a 100644
--- a/src/H5FDsec2.c
+++ b/src/H5FDsec2.c
@@ -682,7 +682,7 @@ H5FD_sec2_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t UNUSED dxpl_id, hadd
if (0==nbytes) {
/* end of file but not end of format address space */
HDmemset(buf, 0, size);
- size = 0;
+ break;
}
assert(nbytes>=0);
assert((size_t)nbytes<=size);
diff --git a/src/H5FPclient.c b/src/H5FPclient.c
index e80288d..cdc22e0 100644
--- a/src/H5FPclient.c
+++ b/src/H5FPclient.c
@@ -286,7 +286,7 @@ done:
herr_t
H5FP_request_read_metadata(H5FD_t *file, unsigned file_id, hid_t dxpl_id,
H5FD_mem_t UNUSED mem_type, haddr_t addr,
- size_t size, uint8_t **buf, int *bytes_read,
+ size_t size, uint8_t **buf,
unsigned *req_id, H5FP_status_t *status)
{
H5FP_request_t req;
@@ -300,7 +300,6 @@ H5FP_request_read_metadata(H5FD_t *file, unsigned file_id, hid_t dxpl_id,
/* check args */
assert(file);
assert(buf);
- assert(bytes_read);
assert(req_id);
assert(status);
@@ -352,7 +351,6 @@ HDfprintf(stderr, "Buffer not big enough to hold metadata!!!!\n");
assert(0);
}
- *bytes_read = size;
break;
case H5FP_STATUS_DUMPING:
/*
diff --git a/src/H5FPprivate.h b/src/H5FPprivate.h
index 0abd9cf..aa85969 100644
--- a/src/H5FPprivate.h
+++ b/src/H5FPprivate.h
@@ -305,7 +305,7 @@ extern herr_t H5FP_request_release_lock(unsigned sap_file_id, hobj_ref_t oid,
H5FP_status_t *status);
extern herr_t H5FP_request_read_metadata(H5FD_t *file, unsigned sap_file_id, hid_t dxpl_id,
H5FD_mem_t mem_type, haddr_t addr,
- size_t size, uint8_t **buf, int *bytes_read,
+ size_t size, uint8_t **buf,
unsigned *req_id, H5FP_status_t *status);
extern herr_t H5FP_request_write_metadata(H5FD_t *file, unsigned file_id, hid_t dxpl_id,
H5FD_mem_t mem_type, haddr_t addr,
diff --git a/testpar/testphdf5.h b/testpar/testphdf5.h
index 711d189..bb5164a 100644
--- a/testpar/testphdf5.h
+++ b/testpar/testphdf5.h
@@ -187,6 +187,7 @@ void extend_readInd(char *filename);
void extend_readAll(char *filename);
void compact_dataset(char *filename);
void big_dataset(const char *filename);
+void short_dataset(const char *filename);
int dataset_vrfy(hssize_t start[], hsize_t count[], hsize_t stride[],
hsize_t block[], DATATYPE *dataset, DATATYPE *original);