summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2002-06-07 03:34:21 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2002-06-07 03:34:21 (GMT)
commit35c3c893e84d4a210b4c993ee92b582d07e63109 (patch)
tree2381a6c92d88376f1f4877e67ca95964022e8b96
parent01f102c2d49d6f25a6bbdaeb874e51e764d7df72 (diff)
downloadhdf5-35c3c893e84d4a210b4c993ee92b582d07e63109.zip
hdf5-35c3c893e84d4a210b4c993ee92b582d07e63109.tar.gz
hdf5-35c3c893e84d4a210b4c993ee92b582d07e63109.tar.bz2
[svn-r5550] Purpose:
Code Improvement Description: Split the metadata accumulator code into two parts: one for allowing writes of the accumulator buffer during reads (when the buffer is dirty and needs to be flushed to disk in order to hold the new metadata being read in) and another for only allowing writes of the buffer during writes. This allows the MPI-I/O VFL driver to use the metadata accumulator (but only during writes) and benefit from the reduced number of metadata I/O operations that it brings. Platforms tested: IRIX64 6.5 (modi4) w/parallel
-rw-r--r--src/H5FD.c83
-rw-r--r--src/H5FDmpio.c9
-rw-r--r--src/H5FDpublic.h14
3 files changed, 67 insertions, 39 deletions
diff --git a/src/H5FD.c b/src/H5FD.c
index 129b133..55661aa 100644
--- a/src/H5FD.c
+++ b/src/H5FD.c
@@ -2300,51 +2300,62 @@ H5FD_read(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, size_t siz
} /* end if */
/* Current read doesn't overlap with metadata accumulator, read it into accumulator */
else {
- /* Flush current contents, if dirty */
- if(file->accum_dirty) {
- if ((file->cls->write)(file, H5FD_MEM_DEFAULT, dxpl_id, file->accum_loc, file->accum_size, file->meta_accum)<0)
- HRETURN_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "driver write request failed");
-
- /* Reset accumulator dirty flag */
- file->accum_dirty=FALSE;
- } /* end if */
-
- /* Cache the new piece of metadata */
- /* Check if we need to resize the buffer */
- if(size>file->accum_buf_size) {
- /* Grow the metadata accumulator buffer */
- if ((file->meta_accum=H5FL_BLK_REALLOC(meta_accum,file->meta_accum,size))==NULL)
- HRETURN_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "unable to allocate metadata accumulator buffer");
+ /* Only update the metadata accumulator if it is not dirty or if
+ * we are allowed to write the accumulator out during reads (when
+ * it is dirty)
+ */
+ if(file->feature_flags&H5FD_FEAT_ACCUMULATE_METADATA_READ || !file->accum_dirty) {
+ /* Flush current contents, if dirty */
+ if(file->accum_dirty) {
+ if ((file->cls->write)(file, H5FD_MEM_DEFAULT, dxpl_id, file->accum_loc, file->accum_size, file->meta_accum)<0)
+ HRETURN_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "driver write request failed");
- /* Note the new buffer size */
- file->accum_buf_size=size;
- } /* end if */
- else {
- /* Check if we should shrink the accumulator buffer */
- if(size<(file->accum_buf_size/H5FD_ACCUM_THROTTLE) &&
- file->accum_buf_size>H5FD_ACCUM_THRESHOLD) {
- size_t new_size=(file->accum_buf_size/H5FD_ACCUM_THROTTLE); /* New size of accumulator buffer */
+ /* Reset accumulator dirty flag */
+ file->accum_dirty=FALSE;
+ } /* end if */
- /* Shrink the accumulator buffer */
- if ((file->meta_accum=H5FL_BLK_REALLOC(meta_accum,file->meta_accum,new_size))==NULL)
+ /* Cache the new piece of metadata */
+ /* Check if we need to resize the buffer */
+ if(size>file->accum_buf_size) {
+ /* Grow the metadata accumulator buffer */
+ if ((file->meta_accum=H5FL_BLK_REALLOC(meta_accum,file->meta_accum,size))==NULL)
HRETURN_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "unable to allocate metadata accumulator buffer");
/* Note the new buffer size */
- file->accum_buf_size=new_size;
+ file->accum_buf_size=size;
} /* end if */
- } /* end else */
+ else {
+ /* Check if we should shrink the accumulator buffer */
+ if(size<(file->accum_buf_size/H5FD_ACCUM_THROTTLE) &&
+ file->accum_buf_size>H5FD_ACCUM_THRESHOLD) {
+ size_t new_size=(file->accum_buf_size/H5FD_ACCUM_THROTTLE); /* New size of accumulator buffer */
- /* Update accumulator information */
- file->accum_loc=addr;
- file->accum_size=size;
- file->accum_dirty=FALSE;
+ /* Shrink the accumulator buffer */
+ if ((file->meta_accum=H5FL_BLK_REALLOC(meta_accum,file->meta_accum,new_size))==NULL)
+ HRETURN_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "unable to allocate metadata accumulator buffer");
+
+ /* Note the new buffer size */
+ file->accum_buf_size=new_size;
+ } /* end if */
+ } /* end else */
+
+ /* Update accumulator information */
+ file->accum_loc=addr;
+ file->accum_size=size;
+ file->accum_dirty=FALSE;
- /* Read into accumulator */
- if ((file->cls->read)(file, H5FD_MEM_DEFAULT, dxpl_id, file->accum_loc, file->accum_size, file->meta_accum)<0)
- HRETURN_ERROR(H5E_VFL, H5E_READERROR, FAIL, "driver read request failed");
+ /* Read into accumulator */
+ if ((file->cls->read)(file, H5FD_MEM_DEFAULT, dxpl_id, file->accum_loc, file->accum_size, file->meta_accum)<0)
+ HRETURN_ERROR(H5E_VFL, H5E_READERROR, FAIL, "driver read request failed");
- /* Copy into buffer */
- HDmemcpy(buf,file->meta_accum,size);
+ /* Copy into buffer */
+ HDmemcpy(buf,file->meta_accum,size);
+ } /* end if */
+ else {
+ /* Dispatch to driver */
+ if ((file->cls->read)(file, type, dxpl_id, addr, size, buf)<0)
+ HRETURN_ERROR(H5E_VFL, H5E_READERROR, FAIL, "driver read request failed");
+ } /* end else */
} /* end else */
} /* end if */
else {
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index 0ec8db3..2d81469 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -944,6 +944,15 @@ H5FD_mpio_query(const H5FD_t *_file, unsigned long *flags /* out */)
if(flags) {
*flags=0;
*flags|=H5FD_FEAT_AGGREGATE_METADATA; /* OK to aggregate metadata allocations */
+
+ /* Distinguish between updating the metadata accumulator on writes and
+ * reads. This is particularly (perhaps only, even) important for MPI-I/O
+ * where we guarantee that writes are collective, but reads may not be.
+ * If we were to allow the metadata accumulator to be written during a
+ * read operation, the application would hang.
+ */
+ *flags|=H5FD_FEAT_ACCUMULATE_METADATA_WRITE; /* OK to accumulate metadata for faster writes */
+
*flags|=H5FD_FEAT_AGGREGATE_SMALLDATA; /* OK to aggregate "small" raw data allocations */
} /* end if */
diff --git a/src/H5FDpublic.h b/src/H5FDpublic.h
index 49c89bc..52a8dec 100644
--- a/src/H5FDpublic.h
+++ b/src/H5FDpublic.h
@@ -90,8 +90,16 @@ typedef enum H5FD_mem_t {
* the library will attempt to cache metadata as it is written to the file
* and build up a larger block of metadata to eventually pass to the VFL
* 'write' routine.
+ *
+ * Distinguish between updating the metadata accumulator on writes and
+ * reads. This is particularly (perhaps only, even) important for MPI-I/O
+ * where we guarantee that writes are collective, but reads may not be.
+ * If we were to allow the metadata accumulator to be written during a
+ * read operation, the application would hang.
*/
-#define H5FD_FEAT_ACCUMULATE_METADATA 0x00000002
+#define H5FD_FEAT_ACCUMULATE_METADATA_WRITE 0x00000002
+#define H5FD_FEAT_ACCUMULATE_METADATA_READ 0x00000004
+#define H5FD_FEAT_ACCUMULATE_METADATA (H5FD_FEAT_ACCUMULATE_METADATA_WRITE|H5FD_FEAT_ACCUMULATE_METADATA_READ)
/*
* Defining the H5FD_FEAT_DATA_SIEVE for a VFL driver means that
* the library will attempt to cache raw data as it is read from/written to
@@ -99,13 +107,13 @@ typedef enum H5FD_mem_t {
* http://www.mcs.anl.gov/~thakur/papers/romio-coll.ps.gz
* http://www.mcs.anl.gov/~thakur/papers/mpio-high-perf.ps.gz
*/
-#define H5FD_FEAT_DATA_SIEVE 0x00000004
+#define H5FD_FEAT_DATA_SIEVE 0x00000008
/*
* Defining the H5FD_FEAT_AGGREGATE_SMALLDATA for a VFL driver means that
* the library will attempt to allocate a larger block for "small" raw data
* and then sub-allocate "small" raw data requests from that larger block.
*/
-#define H5FD_FEAT_AGGREGATE_SMALLDATA 0x00000008
+#define H5FD_FEAT_AGGREGATE_SMALLDATA 0x00000010
/* Forward declaration */