diff options
author | Quincey Koziol <koziol@hdfgroup.org> | 2003-12-29 19:34:11 (GMT) |
---|---|---|
committer | Quincey Koziol <koziol@hdfgroup.org> | 2003-12-29 19:34:11 (GMT) |
commit | 5db6c61f18198ac4477a6ba99d405ff82cf467a7 (patch) | |
tree | 4cf9ef4d947830f28f12d110148c3bfea6ff49e9 /src | |
parent | b6fd9f9d760dbe4e6ab530b12e0672088570224a (diff) | |
download | hdf5-5db6c61f18198ac4477a6ba99d405ff82cf467a7.zip hdf5-5db6c61f18198ac4477a6ba99d405ff82cf467a7.tar.gz hdf5-5db6c61f18198ac4477a6ba99d405ff82cf467a7.tar.bz2 |
[svn-r7993] Purpose:
Code cleanup, bug fixes
Description:
Wrap up rest of changes necessary for fixing the "short" MPI-I/O read
problem that Robb reported.
Platforms tested:
FreeBSD 4.9 (sleipnir)
too minor to require h5committest
Diffstat (limited to 'src')
-rw-r--r-- | src/H5FDfphdf5.c | 125 | ||||
-rw-r--r-- | src/H5FDmpio.c | 18 | ||||
-rw-r--r-- | src/H5FDmpiposix.c | 2 | ||||
-rw-r--r-- | src/H5FDsec2.c | 2 | ||||
-rw-r--r-- | src/H5FPclient.c | 4 | ||||
-rw-r--r-- | src/H5FPprivate.h | 2 |
6 files changed, 85 insertions, 68 deletions
diff --git a/src/H5FDfphdf5.c b/src/H5FDfphdf5.c index 6ee76f6..8198165 100644 --- a/src/H5FDfphdf5.c +++ b/src/H5FDfphdf5.c @@ -1160,14 +1160,11 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id, haddr_t addr, size_t size, void *buf) { H5FD_fphdf5_t *file = (H5FD_fphdf5_t*)_file; - MPI_Offset mpi_off; MPI_Status status; - int mrc; - MPI_Datatype buf_type; - int size_i; - int bytes_read; - int n; - unsigned use_view_this_time = 0; + MPI_Offset mpi_off; + int size_i; /* Integer copy of 'size' to read */ + int bytes_read; /* Number of bytes read in */ + int mrc; /* MPI return code */ herr_t ret_value = SUCCEED; FUNC_ENTER_NOAPI(H5FD_fphdf5_read, FAIL) @@ -1189,14 +1186,18 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id, HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from haddr_t to MPI offset") size_i = (int)size; - if ((hsize_t)size_i != size) HGOTO_ERROR(H5E_INTERNAL, H5E_BADRANGE, FAIL, "can't convert from size_t to int") - /* Only look for MPI views for raw data transfers */ - if (mem_type == H5FD_MEM_DRAW) { + /* Only do real MPI stuff for raw data transfers */ + if(mem_type==H5FD_MEM_DRAW) { H5P_genplist_t *plist; - H5FD_mpio_xfer_t xfer_mode; /* I/O tranfer mode */ + H5FD_mpio_xfer_t xfer_mode; /* I/O tranfer mode */ + MPI_Datatype buf_type; /* MPI datatype for I/O operation */ + int n; + int type_size; /* MPI datatype used for I/O's size */ + int io_size; /* Actual number of bytes requested */ + unsigned use_view_this_time = 0; /* Flag to indicate we are using an MPI view */ /* Obtain the data transfer properties */ if ((plist = H5I_object(dxpl_id)) == NULL) @@ -1239,6 +1240,13 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id, if ((mrc = MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &status)) != MPI_SUCCESS) HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mrc) + + /* + * Reset the file view when we used MPI derived types + */ + if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, + H5FD_mpio_native, file->info)) != MPI_SUCCESS) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc) } else { /* * Prepare for a simple xfer of a contiguous block of bytes. The @@ -1252,21 +1260,33 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id, HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mrc) } - /* - * MPI_Get_count incorrectly returns negative count; fake a complete - * read. + /* How many bytes were actually read? */ + /* [This works because the "basic elements" we use for all our MPI derived + * types are MPI_BYTE. We should be using the 'buf_type' for the MPI + * datatype in this call though... (We aren't because using it causes + * the LANL "qsc" machine to dump core - 12/19/03) - QAK] */ - bytes_read = size_i; + if (MPI_SUCCESS != (mrc=MPI_Get_elements(&status, MPI_BYTE, &bytes_read))) + HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mrc) + + /* Get the type's size */ + if (MPI_SUCCESS != (mrc=MPI_Type_size(buf_type,&type_size))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mrc) + + /* Compute the actual number of bytes requested */ + io_size=type_size*size_i; + + /* Check for read failure */ + if (bytes_read<0 || bytes_read>io_size) + HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed") /* - * Reset the file view when we used MPI derived types + * This gives us zeroes beyond end of physical MPI file. */ - if (use_view_this_time) - if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, - H5FD_mpio_native, file->info)) != MPI_SUCCESS) - HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc) - - } else { + if ((n=(io_size-bytes_read)) > 0) + HDmemset((char*)buf+bytes_read, 0, (size_t)n); + } /* end if */ + else { /* * This is metadata - we want to try to read it from the SAP * first. @@ -1276,7 +1296,7 @@ H5FD_fphdf5_read(H5FD_t *_file, H5FD_mem_t mem_type, hid_t dxpl_id, if (H5FP_request_read_metadata(_file, file->file_id, dxpl_id, mem_type, addr, size, (uint8_t**)&buf, - &bytes_read, &req_id, &sap_status) != SUCCEED) { + &req_id, &sap_status) != SUCCEED) { /* FIXME: The read failed, for some reason */ HDfprintf(stderr, "%s:%d: Metadata cache read failed!\n", FUNC, __LINE__); } @@ -1297,27 +1317,6 @@ HDfprintf(stderr, "%s:%d: Metadata cache read failed!\n", FUNC, __LINE__); } } /* end else */ - /* Check for read failure */ - if (bytes_read < 0 || bytes_read > size_i) - HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed") - - /* - * This gives us zeroes beyond end of physical MPI file. What about - * reading past logical end of HDF5 file??? - */ - n = size_i - bytes_read; - - if (n > 0) { - if (use_view_this_time) - /* - * INCOMPLETE rky 1998-09-18 - * Haven't implemented reading zeros beyond EOF. What to do??? - */ - HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "eof file read failed") - - memset((char*)buf + bytes_read, 0, (size_t)n); - } - done: FUNC_LEAVE_NOAPI(ret_value) } @@ -1440,6 +1439,8 @@ H5FD_fphdf5_write_real(H5FD_t *_file, H5FD_mem_t mem_type, H5P_genplist_t *plist MPI_Offset mpi_off; int mrc; int bytes_written; + int type_size; /* MPI datatype used for I/O's size */ + int io_size; /* Actual number of bytes requested */ unsigned use_view_this_time = 0; herr_t ret_value = SUCCEED; @@ -1504,28 +1505,38 @@ H5FD_fphdf5_write_real(H5FD_t *_file, H5FD_mem_t mem_type, H5P_genplist_t *plist if ((mrc = MPI_File_write_at_all(file->f, mpi_off, (void*)buf, size, buf_type, &status)) != MPI_SUCCESS) HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mrc) - } else { + + /* Reset the file view when we used MPI derived types */ + if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, + H5FD_mpio_native, file->info)) != MPI_SUCCESS) + HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc) + } /* end if */ + else { /*OKAY: CAST DISCARDS CONST QUALIFIER*/ if ((mrc = MPI_File_write_at(file->f, mpi_off, (void*)buf, size, buf_type, &status)) != MPI_SUCCESS) HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mrc) } - /* Reset the file view when we used MPI derived types */ - if (use_view_this_time) - if ((mrc = MPI_File_set_view(file->f, (MPI_Offset)0, MPI_BYTE, MPI_BYTE, - H5FD_mpio_native, file->info)) != MPI_SUCCESS) - HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mrc) - - /* - * MPI_Get_count incorrectly returns negative count; fake a complete - * write (use size for both parameters). + /* How many bytes were actually written? */ + /* [This works because the "basic elements" we use for all our MPI derived + * types are MPI_BYTE. We should be using the 'buf_type' for the MPI + * datatype in this call though... (We aren't because using it causes + * the LANL "qsc" machine to dump core - 12/19/03) - QAK] */ - bytes_written = size; + if (MPI_SUCCESS != (mrc=MPI_Get_elements(&status, MPI_BYTE, &bytes_written))) + HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mrc) + + /* Get the type's size */ + if (MPI_SUCCESS != (mrc=MPI_Type_size(buf_type,&type_size))) + HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mrc) + + /* Compute the actual number of bytes requested */ + io_size=type_size*size; /* Check for write failure */ - if (bytes_written < 0 || bytes_written > size) - HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file write failed") + if (bytes_written!=io_size) + HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "file write failed") /* Forget the EOF value (see H5FD_fphdf5_get_eof()) */ file->eof = HADDR_UNDEF; diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c index 1b0f461..61d97da 100644 --- a/src/H5FDmpio.c +++ b/src/H5FDmpio.c @@ -40,8 +40,7 @@ /* * The driver identification number, initialized at runtime if H5_HAVE_PARALLEL * is defined. This allows applications to still have the H5FD_MPIO - * "constants" in their source code (it also makes this file strictly ANSI - * compliant when H5_HAVE_PARALLEL isn't defined) + * "constants" in their source code. */ static hid_t H5FD_MPIO_g = 0; @@ -1323,6 +1322,10 @@ done: * to HADDR_UNDEF which is the error return value of this * function. * + * Keeping the EOF updated (during write calls) is expensive + * because any process may extend the physical end of the + * file. -QAK + * * Return: Success: The end-of-address marker. * * Failure: HADDR_UNDEF @@ -1563,7 +1566,9 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add /* How many bytes were actually read? */ /* [This works because the "basic elements" we use for all our MPI derived - * types are MPI_BYTE - QAK] + * types are MPI_BYTE. We should be using the 'buf_type' for the MPI + * datatype in this call though... (We aren't because using it causes + * the LANL "qsc" machine to dump core - 12/19/03) - QAK] */ if (MPI_SUCCESS != (mpi_code=MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read))) HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code) @@ -1879,7 +1884,9 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, /* How many bytes were actually written? */ /* [This works because the "basic elements" we use for all our MPI derived - * types are MPI_BYTE - QAK] + * types are MPI_BYTE. We should be using the 'buf_type' for the MPI + * datatype in this call though... (We aren't because using it causes + * the LANL "qsc" machine to dump core - 12/19/03) - QAK] */ if (MPI_SUCCESS != (mpi_code=MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_written))) HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code) @@ -1903,7 +1910,7 @@ done: /* if only p<round> writes, need to broadcast the ret_value to other processes */ if ((type!=H5FD_MEM_DRAW) && H5_mpi_1_metawrite_g) { if (MPI_SUCCESS != (mpi_code=MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm))) - HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code) + HMPI_DONE_ERROR(FAIL, "MPI_Bcast failed", mpi_code) } /* end if */ #endif /* OLD_METADATA_WRITE */ @@ -1912,6 +1919,7 @@ done: fprintf(stdout, "proc %d: Leaving H5FD_mpio_write with ret_value=%d\n", file->mpi_rank, ret_value ); #endif + FUNC_LEAVE_NOAPI(ret_value) } diff --git a/src/H5FDmpiposix.c b/src/H5FDmpiposix.c index 647fb35..4dadb1b 100644 --- a/src/H5FDmpiposix.c +++ b/src/H5FDmpiposix.c @@ -1209,7 +1209,7 @@ H5FD_mpiposix_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t UNUSED dxpl_id, if (0==nbytes) { /* end of file but not end of format address space */ HDmemset(buf, 0, size); - size = 0; + break; } /* end if */ assert(nbytes>=0); assert((size_t)nbytes<=size); diff --git a/src/H5FDsec2.c b/src/H5FDsec2.c index 5b73475..5924c0a 100644 --- a/src/H5FDsec2.c +++ b/src/H5FDsec2.c @@ -682,7 +682,7 @@ H5FD_sec2_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t UNUSED dxpl_id, hadd if (0==nbytes) { /* end of file but not end of format address space */ HDmemset(buf, 0, size); - size = 0; + break; } assert(nbytes>=0); assert((size_t)nbytes<=size); diff --git a/src/H5FPclient.c b/src/H5FPclient.c index e80288d..cdc22e0 100644 --- a/src/H5FPclient.c +++ b/src/H5FPclient.c @@ -286,7 +286,7 @@ done: herr_t H5FP_request_read_metadata(H5FD_t *file, unsigned file_id, hid_t dxpl_id, H5FD_mem_t UNUSED mem_type, haddr_t addr, - size_t size, uint8_t **buf, int *bytes_read, + size_t size, uint8_t **buf, unsigned *req_id, H5FP_status_t *status) { H5FP_request_t req; @@ -300,7 +300,6 @@ H5FP_request_read_metadata(H5FD_t *file, unsigned file_id, hid_t dxpl_id, /* check args */ assert(file); assert(buf); - assert(bytes_read); assert(req_id); assert(status); @@ -352,7 +351,6 @@ HDfprintf(stderr, "Buffer not big enough to hold metadata!!!!\n"); assert(0); } - *bytes_read = size; break; case H5FP_STATUS_DUMPING: /* diff --git a/src/H5FPprivate.h b/src/H5FPprivate.h index 0abd9cf..aa85969 100644 --- a/src/H5FPprivate.h +++ b/src/H5FPprivate.h @@ -305,7 +305,7 @@ extern herr_t H5FP_request_release_lock(unsigned sap_file_id, hobj_ref_t oid, H5FP_status_t *status); extern herr_t H5FP_request_read_metadata(H5FD_t *file, unsigned sap_file_id, hid_t dxpl_id, H5FD_mem_t mem_type, haddr_t addr, - size_t size, uint8_t **buf, int *bytes_read, + size_t size, uint8_t **buf, unsigned *req_id, H5FP_status_t *status); extern herr_t H5FP_request_write_metadata(H5FD_t *file, unsigned file_id, hid_t dxpl_id, H5FD_mem_t mem_type, haddr_t addr, |