summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2002-06-18 14:02:17 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2002-06-18 14:02:17 (GMT)
commit972707dcd3a123df1435d35d6b6a585222a1b6df (patch)
tree973d2485c9b9c46ad394903ad284b86cdf4b007e
parentad9ca47cecd23223ad8dd58d9e73193fdcf8cf76 (diff)
downloadhdf5-972707dcd3a123df1435d35d6b6a585222a1b6df.zip
hdf5-972707dcd3a123df1435d35d6b6a585222a1b6df.tar.gz
hdf5-972707dcd3a123df1435d35d6b6a585222a1b6df.tar.bz2
[svn-r5660] Purpose:
Code optimization Description: Avoid creating MPI types (and thus requiring a MPI_File_set_view() call) when contiguous selections are used for dataset I/O. This should be a performance improvement for those sorts of selections. Platforms tested: Linux 2.2.x (eirene) w/parallel && IRIX64 6.5 (modi4) w/parallel & FORTRAN
-rw-r--r--release_docs/RELEASE.txt8
-rw-r--r--src/H5FDmpio.c76
-rw-r--r--src/H5FDmpio.h3
-rw-r--r--src/H5Smpio.c285
4 files changed, 242 insertions, 130 deletions
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt
index 4ed8cbe..586d1e4 100644
--- a/release_docs/RELEASE.txt
+++ b/release_docs/RELEASE.txt
@@ -181,11 +181,15 @@ Documentation
New Features
============
+ * Changed MPI I/O routines to avoid creating MPI derived types (and thus
+ needing to set the file view) for contiguous selections within datasets,
+ which should result in some performance improvement for those types of
+ selections. QAK - 2002/06/18
* Enable MPI type support for collective I/O to be enabled by default.
This can be disabled by setting the HDF5_MPI_OPT_TYPES environment
variable to the value "0". QAK - 2002/06/14
- * Allow chunks in chunked datasets to be cached when file is opened for
- read-only access (bug #709). QAK - 2002/06/10
+ * Allow chunks in chunked datasets to be cached when parallel file is
+ opened for read-only access (bug #709). QAK - 2002/06/10
* Added internal "small data" aggregation, which can reduce the number of
actual I/O calls made, improving performance. QAK - 2002/06/05
* Improved internal metadata aggregation, which can reduce the number of
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index 2c4a08b..3e9d200 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -56,7 +56,7 @@ typedef struct H5FD_mpio_t {
haddr_t eof; /*end-of-file marker */
haddr_t eoa; /*end-of-address marker */
haddr_t last_eoa; /* Last known end-of-address marker */
- int old_use_types; /*remember value of use_types */
+ unsigned old_use_view; /*remember value of use_view */
} H5FD_mpio_t;
/* Prototypes */
@@ -150,14 +150,14 @@ static int interface_initialize_g = 0;
/* ======== Temporary, Local data transfer properties ======== */
/* Definitions for memory MPI type property */
-#define H5FD_MPIO_XFER_MEM_MPI_TYPE_NAME "H5FD_mpio_mem_mpi_type"
-#define H5FD_MPIO_XFER_MEM_MPI_TYPE_SIZE sizeof(MPI_Datatype)
+#define H5FD_MPIO_XFER_MEM_MPI_TYPE_NAME "H5FD_mpio_mem_mpi_type"
+#define H5FD_MPIO_XFER_MEM_MPI_TYPE_SIZE sizeof(MPI_Datatype)
/* Definitions for file MPI type property */
-#define H5FD_MPIO_XFER_FILE_MPI_TYPE_NAME "H5FD_mpio_file_mpi_type"
-#define H5FD_MPIO_XFER_FILE_MPI_TYPE_SIZE sizeof(MPI_Datatype)
+#define H5FD_MPIO_XFER_FILE_MPI_TYPE_NAME "H5FD_mpio_file_mpi_type"
+#define H5FD_MPIO_XFER_FILE_MPI_TYPE_SIZE sizeof(MPI_Datatype)
/* Definitions for whether to use MPI types property */
-#define H5FD_MPIO_XFER_USE_MPI_TYPES_NAME "H5FD_mpio_use_mpi_type"
-#define H5FD_MPIO_XFER_USE_MPI_TYPES_SIZE 0
+#define H5FD_MPIO_XFER_USE_VIEW_NAME "H5FD_mpio_use_view"
+#define H5FD_MPIO_XFER_USE_VIEW_SIZE sizeof(unsigned)
/*-------------------------------------------------------------------------
@@ -537,7 +537,7 @@ H5FD_mpio_mpi_size(H5FD_t *_file)
*-------------------------------------------------------------------------
*/
herr_t
-H5FD_mpio_setup(hid_t dxpl_id, MPI_Datatype btype, MPI_Datatype ftype)
+H5FD_mpio_setup(hid_t dxpl_id, MPI_Datatype btype, MPI_Datatype ftype, unsigned use_view)
{
H5P_genplist_t *plist; /* Property list pointer */
@@ -555,8 +555,8 @@ H5FD_mpio_setup(hid_t dxpl_id, MPI_Datatype btype, MPI_Datatype ftype)
if(H5P_insert(plist,H5FD_MPIO_XFER_FILE_MPI_TYPE_NAME,H5FD_MPIO_XFER_FILE_MPI_TYPE_SIZE,&ftype,NULL,NULL,NULL,NULL,NULL)<0)
HRETURN_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't insert MPI-I/O property");
- /* Set 'use types' flag property */
- if(H5P_insert(plist,H5FD_MPIO_XFER_USE_MPI_TYPES_NAME,H5FD_MPIO_XFER_USE_MPI_TYPES_SIZE,NULL,NULL,NULL,NULL,NULL,NULL)<0)
+ /* Set 'use view' property */
+ if(H5P_insert(plist,H5FD_MPIO_XFER_USE_VIEW_NAME,H5FD_MPIO_XFER_USE_VIEW_SIZE,&use_view,NULL,NULL,NULL,NULL,NULL)<0)
HRETURN_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't insert MPI-I/O property");
FUNC_LEAVE(SUCCEED);
@@ -597,8 +597,8 @@ H5FD_mpio_teardown(hid_t dxpl_id)
if(H5P_remove(dxpl_id,plist,H5FD_MPIO_XFER_FILE_MPI_TYPE_NAME)<0)
HRETURN_ERROR(H5E_PLIST, H5E_CANTDELETE, FAIL, "can't remove MPI-I/O property");
- /* Remove 'use types' flag property */
- if(H5P_remove(dxpl_id,plist,H5FD_MPIO_XFER_USE_MPI_TYPES_NAME)<0)
+ /* Remove 'use view' property */
+ if(H5P_remove(dxpl_id,plist,H5FD_MPIO_XFER_USE_VIEW_NAME)<0)
HRETURN_ERROR(H5E_PLIST, H5E_CANTDELETE, FAIL, "can't remove MPI-I/O property");
FUNC_LEAVE(SUCCEED);
@@ -1192,7 +1192,7 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
MPI_Status mpi_stat;
MPI_Datatype buf_type, file_type;
int size_i, bytes_read, n;
- int use_types_this_time, used_types_last_time;
+ unsigned use_view_this_time=0, used_view_last_time;
H5P_genplist_t *plist; /* Property list pointer */
herr_t ret_value=SUCCEED;
@@ -1237,14 +1237,15 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
/*
* Set up for a fancy xfer using complex types, or single byte block. We
- * wouldn't need to rely on the use_types field if MPI semantics allowed
+ * wouldn't need to rely on the use_view field if MPI semantics allowed
* us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which
* could mean "use MPI_BYTE" by convention).
*/
- if((use_types_this_time=H5P_exist_plist(plist,H5FD_MPIO_XFER_USE_MPI_TYPES_NAME))<0)
- HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property");
+ if(H5P_exist_plist(plist,H5FD_MPIO_XFER_USE_VIEW_NAME)>0)
+ if(H5P_get(plist,H5FD_MPIO_XFER_USE_VIEW_NAME,&use_view_this_time)<0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property");
- if (use_types_this_time) {
+ if (use_view_this_time) {
/* prepare for a full-blown xfer using btype, ftype, and disp */
if(H5P_get(plist,H5FD_MPIO_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property");
@@ -1270,20 +1271,16 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
* Don't bother to reset the view if we're not using the types this time,
* and did we didn't use them last time either.
*/
- used_types_last_time = file->old_use_types;
- if (used_types_last_time || /* change to new ftype or MPI_BYTE */
- use_types_this_time) { /* almost certainly a different ftype */
+ used_view_last_time = file->old_use_view;
+ if (used_view_last_time || /* change to new ftype or MPI_BYTE */
+ use_view_this_time) { /* almost certainly a different ftype */
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
if (MPI_SUCCESS != MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info))
HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_File_set_view failed");
}
- /*
- * We always set the use_types flag to 0 because the default is not to
- * use types next time, unless someone explicitly requests it by setting
- * this flag to !=0.
- */
- file->old_use_types = use_types_this_time;
+ /* Keep the 'use view' flag around for the next I/O */
+ file->old_use_view = use_view_this_time;
/* Read the data. */
assert(H5FD_MPIO_INDEPENDENT==dx->xfer_mode || H5FD_MPIO_COLLECTIVE==dx->xfer_mode);
@@ -1314,7 +1311,7 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
/* Calling MPI_Get_count with "MPI_BYTE" is only valid when we actually
* had the 'buf_type' set to MPI_BYTE -QAK
*/
- if(use_types_this_time) {
+ if(use_view_this_time) {
/* Figure out the mapping from the MPI 'buf_type' to bytes, someday...
* If this gets fixed (and MPI_Get_count() is reliable), the
* kludge below where the 'bytes_read' value from MPI_Get_count() is
@@ -1350,7 +1347,7 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
* reading past logical end of HDF5 file???
*/
if ((n=(size_i-bytes_read)) > 0) {
- if (use_types_this_time) {
+ if (use_view_this_time) {
/*
* INCOMPLETE rky 1998-09-18
* Haven't implemented reading zeros beyond EOF. What to do???
@@ -1488,7 +1485,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
MPI_Status mpi_stat;
MPI_Datatype buf_type, file_type;
int size_i, bytes_written;
- int use_types_this_time, used_types_last_time;
+ unsigned use_view_this_time=0, used_view_last_time;
H5P_genplist_t *plist; /* Property list pointer */
herr_t ret_value=SUCCEED;
@@ -1533,14 +1530,15 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
/*
* Set up for a fancy xfer using complex types, or single byte block. We
- * wouldn't need to rely on the use_types field if MPI semantics allowed
+ * wouldn't need to rely on the use_view field if MPI semantics allowed
* us to test that btype=ftype=MPI_BYTE (or even MPI_TYPE_NULL, which
* could mean "use MPI_BYTE" by convention).
*/
- if((use_types_this_time=H5P_exist_plist(plist,H5FD_MPIO_XFER_USE_MPI_TYPES_NAME))<0)
- HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property");
+ if(H5P_exist_plist(plist,H5FD_MPIO_XFER_USE_VIEW_NAME)>0)
+ if(H5P_get(plist,H5FD_MPIO_XFER_USE_VIEW_NAME,&use_view_this_time)<0)
+ HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property");
- if (use_types_this_time) {
+ if (use_view_this_time) {
/* prepare for a full-blown xfer using btype, ftype, and disp */
if(H5P_get(plist,H5FD_MPIO_XFER_MEM_MPI_TYPE_NAME,&buf_type)<0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI-I/O type property");
@@ -1566,20 +1564,20 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
* Don't bother to reset the view if we're not using the types this time,
* and did we didn't use them last time either.
*/
- used_types_last_time = file->old_use_types;
- if (used_types_last_time || /* change to new ftype or MPI_BYTE */
- use_types_this_time) { /* almost certainly a different ftype */
+ used_view_last_time = file->old_use_view;
+ if (used_view_last_time || /* change to new ftype or MPI_BYTE */
+ use_view_this_time) { /* almost certainly a different ftype */
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
if (MPI_SUCCESS != MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info))
HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_File_set_view failed");
}
/*
- * We always set the use_types flag to 0 because the default is not to
+ * We always set the use_view flag to 0 because the default is not to
* use types next time, unless someone explicitly requests it by setting
* this flag to !=0.
*/
- file->old_use_types = use_types_this_time;
+ file->old_use_view = use_view_this_time;
/* Only p<round> will do the actual write if all procs in comm write same data */
if ((type!=H5FD_MEM_DRAW) && H5_mpi_1_metawrite_g) {
@@ -1626,7 +1624,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
/* Calling MPI_Get_count with "MPI_BYTE" is only valid when we actually
* had the 'buf_type' set to MPI_BYTE -QAK
*/
- if(use_types_this_time) {
+ if(use_view_this_time) {
/* Figure out the mapping from the MPI 'buf_type' to bytes, someday...
* If this gets fixed (and MPI_Get_count() is reliable), the
* kludge below where the 'bytes_written' value from MPI_Get_count() is
diff --git a/src/H5FDmpio.h b/src/H5FDmpio.h
index 1997788..db85bb2 100644
--- a/src/H5FDmpio.h
+++ b/src/H5FDmpio.h
@@ -57,7 +57,8 @@ __DLL__ herr_t H5Pget_fapl_mpio(hid_t fapl_id, MPI_Comm *comm/*out*/,
__DLL__ herr_t H5Pset_dxpl_mpio(hid_t dxpl_id, H5FD_mpio_xfer_t xfer_mode);
__DLL__ herr_t H5Pget_dxpl_mpio(hid_t dxpl_id, H5FD_mpio_xfer_t *xfer_mode/*out*/);
__DLL__ MPI_Comm H5FD_mpio_communicator(H5FD_t *_file);
-__DLL__ herr_t H5FD_mpio_setup(hid_t dxpl_id, MPI_Datatype btype, MPI_Datatype ftype);
+__DLL__ herr_t H5FD_mpio_setup(hid_t dxpl_id, MPI_Datatype btype,
+ MPI_Datatype ftype, unsigned use_view);
__DLL__ herr_t H5FD_mpio_teardown(hid_t dxpl_id);
__DLL__ herr_t H5FD_mpio_wait_for_left_neighbor(H5FD_t *file);
__DLL__ herr_t H5FD_mpio_signal_right_neighbor(H5FD_t *file);
diff --git a/src/H5Smpio.c b/src/H5Smpio.c
index 3d5e136..60401f6 100644
--- a/src/H5Smpio.c
+++ b/src/H5Smpio.c
@@ -13,15 +13,14 @@
#define H5F_PACKAGE /*suppress error about including H5Fpkg */
#define H5S_PACKAGE /*suppress error about including H5Spkg */
-#include "H5private.h"
-#include "H5Eprivate.h"
-#include "H5Fpkg.h" /* Ugly, but necessary for the MPIO I/O accesses */
-#include "H5FDprivate.h" /* Necessary for the H5FD_write & H5FD_read prototypes.. */
+#include "H5private.h" /* Internal types, etc. */
+#include "H5Eprivate.h" /* Error reporting */
+#include "H5Fpkg.h" /* Ugly, but necessary for the MPIO I/O accesses */
+#include "H5FDmpio.h" /* MPIO file driver */
+#include "H5FDprivate.h" /* Necessary for the H5FD_write & H5FD_read prototypes.. */
#include "H5Iprivate.h" /* Object IDs */
#include "H5Pprivate.h" /* Property Lists */
-#include "H5Spkg.h"
-
-#include "H5FDmpio.h" /*the MPIO file driver */
+#include "H5Spkg.h" /* Dataspaces */
#ifndef H5_HAVE_PARALLEL
/*
@@ -45,22 +44,35 @@ H5S_mpio_all_type( const H5S_t *space, const size_t elmt_size,
/* out: */
MPI_Datatype *new_type,
size_t *count,
+ hsize_t *extra_offset,
+ hbool_t *use_view,
hbool_t *is_derived_type );
static herr_t
H5S_mpio_hyper_type( const H5S_t *space, const size_t elmt_size,
/* out: */
MPI_Datatype *new_type,
size_t *count,
+ hsize_t *extra_offset,
+ hbool_t *use_view,
+ hbool_t *is_derived_type );
+static herr_t
+H5S_mpio_hyper_contig_type( const H5S_t *space, const size_t elmt_size,
+ /* out: */
+ MPI_Datatype *new_type,
+ size_t *count,
+ hsize_t *extra_offset,
+ hbool_t *use_view,
hbool_t *is_derived_type );
static herr_t
H5S_mpio_space_type( const H5S_t *space, const size_t elmt_size,
/* out: */
MPI_Datatype *new_type,
size_t *count,
+ hsize_t *extra_offset,
+ hbool_t *use_view,
hbool_t *is_derived_type );
static herr_t
-H5S_mpio_spaces_xfer(H5F_t *f, const H5O_layout_t *layout,
- H5P_genplist_t *dc_plist, size_t elmt_size,
+H5S_mpio_spaces_xfer(H5F_t *f, const H5O_layout_t *layout, size_t elmt_size,
const H5S_t *file_space, const H5S_t *mem_space,
hid_t dxpl_id, void *buf/*out*/, const hbool_t do_write);
@@ -74,12 +86,17 @@ H5S_mpio_spaces_xfer(H5F_t *f, const H5O_layout_t *layout,
* Outputs: *new_type the MPI type corresponding to the selection
* *count how many objects of the new_type in selection
* (useful if this is the buffer type for xfer)
+ * *extra_offset Number of bytes of offset within dataset
+ * *use_view 0 if view not needed, 1 if needed
* *is_derived_type 0 if MPI primitive type, 1 if derived
*
* Programmer: rky 980813
*
* Modifications:
*
+ * Quincey Koziol, June 18, 2002
+ * Added 'extra_offset' and 'use_view' parameters
+ *
*-------------------------------------------------------------------------
*/
static herr_t
@@ -87,6 +104,8 @@ H5S_mpio_all_type( const H5S_t *space, const size_t elmt_size,
/* out: */
MPI_Datatype *new_type,
size_t *count,
+ hsize_t *extra_offset,
+ hbool_t *use_view,
hbool_t *is_derived_type )
{
hsize_t total_bytes;
@@ -104,8 +123,9 @@ H5S_mpio_all_type( const H5S_t *space, const size_t elmt_size,
/* fill in the return values */
*new_type = MPI_BYTE;
- H5_CHECK_OVERFLOW(total_bytes, hsize_t, size_t);
- *count = (size_t)total_bytes;
+ H5_ASSIGN_OVERFLOW(*count, total_bytes, hsize_t, size_t);
+ *extra_offset = 0;
+ *use_view = 0;
*is_derived_type = 0;
#ifdef H5Smpi_DEBUG
@@ -125,6 +145,8 @@ H5S_mpio_all_type( const H5S_t *space, const size_t elmt_size,
* Outputs: *new_type the MPI type corresponding to the selection
* *count how many objects of the new_type in selection
* (useful if this is the buffer type for xfer)
+ * *extra_offset Number of bytes of offset within dataset
+ * *use_view 0 if view not needed, 1 if needed
* *is_derived_type 0 if MPI primitive type, 1 if derived
*
* Programmer: rky 980813
@@ -136,6 +158,10 @@ H5S_mpio_all_type( const H5S_t *space, const size_t elmt_size,
* akc, rky 2000-11-16 Replaced hard coded dimension size with
* H5S_MAX_RANK.
*
+ * Quincey Koziol, June 18, 2002
+ * Added 'extra_offset' and 'use_view' parameters. Also accomodate
+ * selection offset in MPI type built.
+ *
*-------------------------------------------------------------------------
*/
static herr_t
@@ -143,6 +169,8 @@ H5S_mpio_hyper_type( const H5S_t *space, const size_t elmt_size,
/* out: */
MPI_Datatype *new_type,
size_t *count,
+ hsize_t *extra_offset,
+ hbool_t *use_view,
hbool_t *is_derived_type )
{
struct dim { /* less hassle than malloc/free & ilk */
@@ -180,7 +208,7 @@ H5S_mpio_hyper_type( const H5S_t *space, const size_t elmt_size,
/* make a local copy of the dimension info so we can transform them */
assert(rank<=H5S_MAX_RANK); /* within array bounds */
for ( i=0; i<rank; ++i) {
- d[i].start = diminfo[i].start;
+ d[i].start = diminfo[i].start+space->select.offset[i];
d[i].strid = diminfo[i].stride;
d[i].block = diminfo[i].block;
d[i].count = diminfo[i].count;
@@ -423,6 +451,8 @@ H5S_mpio_hyper_type( const H5S_t *space, const size_t elmt_size,
/* fill in the remaining return values */
*count = 1; /* only have to move one of these suckers! */
+ *extra_offset = 0;
+ *use_view = 1;
*is_derived_type = 1;
HGOTO_DONE(SUCCEED);
@@ -430,6 +460,8 @@ empty:
/* special case: empty hyperslab */
*new_type = MPI_BYTE;
*count = 0;
+ *extra_offset = 0;
+ *use_view = 1; /* Note that this 'use_view' could go either way, but go with '1' for now */
*is_derived_type = 0;
done:
@@ -442,6 +474,87 @@ done:
/*-------------------------------------------------------------------------
+ * Function: H5S_mpio_hyper_contig_type
+ *
+ * Purpose: Translate a contiguous HDF5 "hyperslab" selection into an MPI type.
+ *
+ * Return: non-negative on success, negative on failure.
+ *
+ * Outputs: *new_type the MPI type corresponding to the selection
+ * *count how many objects of the new_type in selection
+ * (useful if this is the buffer type for xfer)
+ * *extra_offset Number of bytes of offset within dataset
+ * *use_view 0 if view not needed, 1 if needed
+ * *is_derived_type 0 if MPI primitive type, 1 if derived
+ *
+ * Programmer: Quincey Koziol, 2002/06/17
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5S_mpio_hyper_contig_type( const H5S_t *space, const size_t elmt_size,
+ /* out: */
+ MPI_Datatype *new_type,
+ size_t *count,
+ hsize_t *extra_offset,
+ hbool_t *use_view,
+ hbool_t *is_derived_type )
+{
+ hsize_t total_bytes; /* Number of bytes in selection */
+ hssize_t nelem; /* Number of elements in selection */
+ hsize_t byte_offset; /* Byte offset of contiguous region within selection */
+ hsize_t acc; /* Accumulator */
+ hsize_t slab[H5O_LAYOUT_NDIMS]; /* Hyperslab size */
+ hssize_t offset[H5O_LAYOUT_NDIMS]; /* Offset in selection */
+ int ndims; /* Number of dimensions of dataset */
+ int i; /* Local index */
+
+ FUNC_ENTER_NOINIT(H5S_mpio_hyper_contig_type);
+
+ /* Check args */
+ assert (space);
+
+ /* Get the number of elements in the selection */
+ nelem=H5S_get_select_npoints(space);
+
+ /* Compute the number of bytes in selection */
+ total_bytes = (hsize_t)elmt_size*nelem;
+
+ /* Set up convenient aliased */
+ ndims=space->extent.u.simple.rank;
+
+ /* Initialize row sizes for each dimension */
+ for(i=(ndims-1),acc=1; i>=0; i--) {
+ slab[i]=acc*elmt_size;
+ acc*=space->extent.u.simple.size[i];
+ } /* end for */
+
+ /* Get in the selection offset */
+ assert(space->select.sel_info.hslab.diminfo);
+ for(i=0; i<ndims; i++)
+ offset[i] = space->select.sel_info.hslab.diminfo[i].start+space->select.offset[i];
+
+ /* Compute the initial buffer offset */
+ for(i=0,byte_offset=0; i<ndims; i++)
+ byte_offset+=offset[i]*slab[i];
+
+ /* fill in the return values */
+ *new_type = MPI_BYTE;
+ H5_ASSIGN_OVERFLOW(*count, total_bytes, hsize_t, size_t);
+ *extra_offset = byte_offset;
+ *use_view = 0;
+ *is_derived_type = 0;
+
+#ifdef H5Smpi_DEBUG
+ HDfprintf(stdout, "Leave %s total_bytes=%Hu\n", FUNC, total_bytes );
+#endif
+ FUNC_LEAVE (SUCCEED);
+} /* end H5S_mpio_hyper_contig_type() */
+
+
+/*-------------------------------------------------------------------------
* Function: H5S_mpio_space_type
*
* Purpose: Translate an HDF5 dataspace selection into an MPI type.
@@ -452,12 +565,17 @@ done:
* Outputs: *new_type the MPI type corresponding to the selection
* *count how many objects of the new_type in selection
* (useful if this is the buffer type for xfer)
+ * *extra_offset Number of bytes of offset within dataset
+ * *use_view 0 if view not needed, 1 if needed
* *is_derived_type 0 if MPI primitive type, 1 if derived
*
* Programmer: rky 980813
*
* Modifications:
*
+ * Quincey Koziol, June 18, 2002
+ * Added 'extra_offset' and 'use_view' parameters
+ *
*-------------------------------------------------------------------------
*/
static herr_t
@@ -465,6 +583,8 @@ H5S_mpio_space_type( const H5S_t *space, const size_t elmt_size,
/* out: */
MPI_Datatype *new_type,
size_t *count,
+ hsize_t *extra_offset,
+ hbool_t *use_view,
hbool_t *is_derived_type )
{
int err;
@@ -487,7 +607,7 @@ H5S_mpio_space_type( const H5S_t *space, const size_t elmt_size,
case H5S_SEL_NONE:
case H5S_SEL_ALL:
err = H5S_mpio_all_type( space, elmt_size,
- /* out: */ new_type, count, is_derived_type );
+ /* out: */ new_type, count, extra_offset, use_view, is_derived_type );
if (err<0)
HRETURN_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type");
break;
@@ -498,9 +618,15 @@ H5S_mpio_space_type( const H5S_t *space, const size_t elmt_size,
break;
case H5S_SEL_HYPERSLABS:
- err = H5S_mpio_hyper_type( space, elmt_size,
- /* out: */ new_type, count, is_derived_type );
- if (err)
+ if(H5S_select_contiguous(space)) {
+ err = H5S_mpio_hyper_contig_type( space, elmt_size,
+ /* out: */ new_type, count, extra_offset, use_view, is_derived_type );
+ } /* end if */
+ else {
+ err = H5S_mpio_hyper_type( space, elmt_size,
+ /* out: */ new_type, count, extra_offset, use_view, is_derived_type );
+ } /* end else */
+ if (err<0)
HRETURN_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't convert \"all\" selection to MPI type");
break;
@@ -534,6 +660,17 @@ H5S_mpio_space_type( const H5S_t *space, const size_t elmt_size,
*
* Programmer: rky 980813
*
+ * Notes:
+ * For collective data transfer only since this would eventually call
+ * H5FD_mpio_setup to do setup to eveually call MPI_File_set_view in
+ * H5FD_mpio_read or H5FD_mpio_write. MPI_File_set_view is a collective
+ * call. Letting independent data transfer use this route would result in
+ * hanging.
+ *
+ * The preconditions for calling this routine are located in the
+ * H5S_mpio_opt_possible() routine, which determines whether this routine
+ * can be called for a given dataset transfer.
+ *
* Modifications:
* rky 980918
* Added must_convert parameter to let caller know we can't optimize
@@ -550,27 +687,30 @@ H5S_mpio_space_type( const H5S_t *space, const size_t elmt_size,
* Removed 'disp' parameter from H5FD_mpio_setup routine and use the
* address of the dataset in MPI_File_set_view() calls, as necessary.
*
+ * QAK - 2002/06/18
+ * Removed 'dc_plist' parameter, since it was not used. Also, switch to
+ * getting the 'use_view' and 'extra_offset' settings for each selection.
+ *
*-------------------------------------------------------------------------
*/
static herr_t
-H5S_mpio_spaces_xfer(H5F_t *f, const H5O_layout_t *layout,
- H5P_genplist_t UNUSED *dc_plist, size_t elmt_size,
+H5S_mpio_spaces_xfer(H5F_t *f, const H5O_layout_t *layout, size_t elmt_size,
const H5S_t *file_space, const H5S_t *mem_space,
- hid_t dxpl_id, void *buf /*out*/,
+ hid_t dxpl_id, void *_buf /*out*/,
const hbool_t do_write )
{
- herr_t ret_value = SUCCEED;
- int err;
- haddr_t addr;
- size_t mpi_count;
- size_t mpi_buf_count, mpi_unused_count;
- MPI_Datatype mpi_buf_type, mpi_file_type;
- hbool_t mbt_is_derived=0,
- mft_is_derived=0;
+ haddr_t addr; /* Address of dataset (or selection) within file */
+ size_t mpi_buf_count, mpi_file_count; /* Number of "objects" to transfer */
+ hsize_t mpi_buf_offset, mpi_file_offset; /* Offset within dataset where selection (ie. MPI type) begins */
+ MPI_Datatype mpi_buf_type, mpi_file_type; /* MPI types for buffer (memory) and file */
+ hbool_t mbt_use_view=0, /* Whether we need to use a view for the buffer (memory) type */
+ mft_use_view=0; /* Whether we need to use a view for the file type */
+ hbool_t mbt_is_derived=0, /* Whether the buffer (memory) type is derived and needs to be free'd */
+ mft_is_derived=0; /* Whether the file type is derived and needs to be free'd */
hbool_t plist_is_setup=0; /* Whether the dxpl has been customized */
-#if 0
- H5P_genplist_t *plist; /* Property list pointer */
-#endif /* 0 */
+ uint8_t *buf=(uint8_t *)_buf; /* Alias for pointer arithmetic */
+ int err; /* Error detection value */
+ herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOINIT(H5S_mpio_spaces_xfer);
@@ -585,42 +725,13 @@ H5S_mpio_spaces_xfer(H5F_t *f, const H5O_layout_t *layout,
assert(H5I_GENPROP_LST==H5I_get_type(dxpl_id));
assert(TRUE==H5P_isa_class(dxpl_id,H5P_DATASET_XFER));
- /*
- * For collective data transfer only since this would eventually
- * call H5FD_mpio_setup to do setup to eveually call MPI_File_set_view
- * in H5FD_mpio_read or H5FD_mpio_write. MPI_File_set_view is a
- * collective call. Letting independent data transfer use this
- * route would result in hanging.
- */
-#if 0
- /* For now, the checking is being done in
- * H5D_write and H5D_read before it is called because
- * the following block of code, though with the right idea, is not
- * correct yet.
- */
- {
- /* Get the transfer mode */
- H5FD_mpio_dxpl_t *dx;
- hid_t driver_id; /* VFL driver ID */
-
- if(NULL == (plist = H5I_object(dxpl_id)))
- HRETURN_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list");
-
- /* Get the driver ID */
- if(H5P_get(plist, H5D_XFER_VFL_ID_NAME, &driver_id)<0)
- HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver ID");
-
- /* Get the driver information */
- if(H5P_get(plist, H5D_XFER_VFL_INFO_NAME, &dx)<0)
- HGOTO_ERROR (H5E_PLIST, H5E_CANTGET, FAIL, "Can't retrieve VFL driver info");
- }
-#endif
-
/* create the MPI buffer type */
err = H5S_mpio_space_type( mem_space, elmt_size,
/* out: */
&mpi_buf_type,
&mpi_buf_count,
+ &mpi_buf_offset,
+ &mbt_use_view,
&mbt_is_derived );
if (MPI_SUCCESS != err)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI buf type");
@@ -629,7 +740,9 @@ H5S_mpio_spaces_xfer(H5F_t *f, const H5O_layout_t *layout,
err = H5S_mpio_space_type( file_space, elmt_size,
/* out: */
&mpi_file_type,
- &mpi_unused_count,
+ &mpi_file_count,
+ &mpi_file_offset,
+ &mft_use_view,
&mft_is_derived );
if (MPI_SUCCESS != err)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADTYPE, FAIL,"couldn't create MPI file type");
@@ -638,7 +751,7 @@ H5S_mpio_spaces_xfer(H5F_t *f, const H5O_layout_t *layout,
* the address to read from. This should be used as the diplacement for
* a call to MPI_File_set_view() in the read or write call.
*/
- addr = f->shared->base_addr + layout->addr;
+ addr = f->shared->base_addr + layout->addr + mpi_file_offset;
#ifdef H5Smpi_DEBUG
HDfprintf(stdout, "spaces_xfer: addr=%a\n", addr );
#endif
@@ -647,23 +760,22 @@ H5S_mpio_spaces_xfer(H5F_t *f, const H5O_layout_t *layout,
* Pass buf type, file type to the file driver. Request an MPI type
* transfer (instead of an elementary byteblock transfer).
*/
- if(H5FD_mpio_setup(dxpl_id, mpi_buf_type, mpi_file_type)<0)
+ if(H5FD_mpio_setup(dxpl_id, mpi_buf_type, mpi_file_type, (unsigned)(mbt_use_view || mft_use_view))<0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI-I/O properties");
plist_is_setup=1;
+ /* Adjust the buffer pointer to the beginning of the selection */
+ buf+=mpi_buf_offset;
+
/* transfer the data */
- H5_CHECK_OVERFLOW(mpi_buf_count, hsize_t, size_t);
- mpi_count = (size_t)mpi_buf_count;
if (do_write) {
- err = H5FD_write(f->shared->lf, H5FD_MEM_DRAW, dxpl_id, addr, mpi_count, buf);
- if (err) {
+ err = H5FD_write(f->shared->lf, H5FD_MEM_DRAW, dxpl_id, addr, mpi_buf_count, buf);
+ if (err<0)
HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL,"MPI write failed");
- }
} else {
- err = H5FD_read (f->shared->lf, H5FD_MEM_DRAW, dxpl_id, addr, mpi_count, buf);
- if (err) {
+ err = H5FD_read (f->shared->lf, H5FD_MEM_DRAW, dxpl_id, addr, mpi_buf_count, buf);
+ if (err<0)
HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL,"MPI read failed");
- }
}
done:
@@ -676,17 +788,13 @@ done:
/* free the MPI buf and file types */
if (mbt_is_derived) {
err = MPI_Type_free( &mpi_buf_type );
- if (MPI_SUCCESS != err) {
- HRETURN_ERROR(H5E_DATASPACE, H5E_MPI, FAIL,
- "unable to free MPI file type");
- }
+ if (MPI_SUCCESS != err)
+ HRETURN_ERROR(H5E_DATASPACE, H5E_MPI, FAIL, "unable to free MPI file type");
}
if (mft_is_derived) {
err = MPI_Type_free( &mpi_file_type );
- if (MPI_SUCCESS != err) {
- HRETURN_ERROR(H5E_DATASPACE, H5E_MPI, FAIL,
- "unable to free MPI file type");
- }
+ if (MPI_SUCCESS != err)
+ HRETURN_ERROR(H5E_DATASPACE, H5E_MPI, FAIL, "unable to free MPI file type");
}
FUNC_LEAVE (ret_value);
@@ -715,15 +823,15 @@ done:
*/
herr_t
H5S_mpio_spaces_read(H5F_t *f, const H5O_layout_t *layout,
- H5P_genplist_t *dc_plist, size_t elmt_size,
- const H5S_t *file_space, const H5S_t *mem_space,
- hid_t dxpl_id, void *buf/*out*/)
+ H5P_genplist_t UNUSED *dc_plist, size_t elmt_size,
+ const H5S_t *file_space, const H5S_t *mem_space,
+ hid_t dxpl_id, void *buf/*out*/)
{
herr_t ret_value = FAIL;
FUNC_ENTER_NOAPI(H5S_mpio_spaces_read, FAIL);
- ret_value = H5S_mpio_spaces_xfer(f, layout, dc_plist, elmt_size,
+ ret_value = H5S_mpio_spaces_xfer(f, layout, elmt_size,
file_space, mem_space, dxpl_id,
buf, 0/*read*/);
@@ -753,15 +861,16 @@ H5S_mpio_spaces_read(H5F_t *f, const H5O_layout_t *layout,
*/
herr_t
H5S_mpio_spaces_write(H5F_t *f, const H5O_layout_t *layout,
- H5P_genplist_t *dc_plist, size_t elmt_size,
- const H5S_t *file_space, const H5S_t *mem_space,
- hid_t dxpl_id, const void *buf)
+ H5P_genplist_t UNUSED *dc_plist, size_t elmt_size,
+ const H5S_t *file_space, const H5S_t *mem_space,
+ hid_t dxpl_id, const void *buf)
{
herr_t ret_value = FAIL;
FUNC_ENTER_NOAPI(H5S_mpio_spaces_write, FAIL);
- ret_value = H5S_mpio_spaces_xfer(f, layout, dc_plist, elmt_size,
+ /*OKAY: CAST DISCARDS CONST QUALIFIER*/
+ ret_value = H5S_mpio_spaces_xfer(f, layout, elmt_size,
file_space, mem_space, dxpl_id,
(void*)buf, 1/*write*/);