summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2002-06-07 03:32:02 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2002-06-07 03:32:02 (GMT)
commitb6fbe63a4803a77591cff5e0be03fe7e6e705a47 (patch)
tree7750e759971708ad8e049c564f8e6e1600e01ca5 /src
parente1f36155744674c71975bf253d44ffdd618b2808 (diff)
downloadhdf5-b6fbe63a4803a77591cff5e0be03fe7e6e705a47.zip
hdf5-b6fbe63a4803a77591cff5e0be03fe7e6e705a47.tar.gz
hdf5-b6fbe63a4803a77591cff5e0be03fe7e6e705a47.tar.bz2
[svn-r5549] Purpose:
Code Improvement Description: Split the metadata accumulator code into two parts: one for allowing writes of the accumulator buffer during reads (when the buffer is dirty and needs to be flushed to disk in order to hold the new metadata being read in) and another for only allowing writes of the buffer during writes. This allows the MPI-I/O VFL driver to use the metadata accumulator (but only during writes) and benefit from the reduced number of metadata I/O operations that it brings. Platforms tested: IRIX64 6.5 (modi4) w/parallel
Diffstat (limited to 'src')
-rw-r--r--src/H5FD.c85
-rw-r--r--src/H5FDmpio.c9
-rw-r--r--src/H5FDpublic.h14
3 files changed, 68 insertions, 40 deletions
diff --git a/src/H5FD.c b/src/H5FD.c
index f77cbda..23563f8 100644
--- a/src/H5FD.c
+++ b/src/H5FD.c
@@ -2274,52 +2274,63 @@ H5FD_read(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, hsize_t si
} /* end if */
/* Current read doesn't overlap with metadata accumulator, read it into accumulator */
else {
- /* Flush current contents, if dirty */
- if(file->accum_dirty) {
- if ((file->cls->write)(file, H5FD_MEM_DEFAULT, dxpl_id, file->accum_loc, file->accum_size, file->meta_accum)<0)
- HRETURN_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "driver write request failed");
-
- /* Reset accumulator dirty flag */
- file->accum_dirty=FALSE;
- } /* end if */
-
- /* Cache the new piece of metadata */
- /* Check if we need to resize the buffer */
- if(size>file->accum_buf_size) {
- /* Grow the metadata accumulator buffer */
- if ((file->meta_accum=H5FL_BLK_REALLOC(meta_accum,file->meta_accum,size))==NULL)
- HRETURN_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "unable to allocate metadata accumulator buffer");
+ /* Only update the metadata accumulator if it is not dirty or if
+ * we are allowed to write the accumulator out during reads (when
+ * it is dirty)
+ */
+ if(file->feature_flags&H5FD_FEAT_ACCUMULATE_METADATA_READ || !file->accum_dirty) {
+ /* Flush current contents, if dirty */
+ if(file->accum_dirty) {
+ if ((file->cls->write)(file, H5FD_MEM_DEFAULT, dxpl_id, file->accum_loc, file->accum_size, file->meta_accum)<0)
+ HRETURN_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "driver write request failed");
- /* Note the new buffer size */
- file->accum_buf_size=size;
- } /* end if */
- else {
- /* Check if we should shrink the accumulator buffer */
- if(size<(file->accum_buf_size/H5FD_ACCUM_THROTTLE) &&
- file->accum_buf_size>H5FD_ACCUM_THRESHOLD) {
- hsize_t new_size=(file->accum_buf_size/H5FD_ACCUM_THROTTLE); /* New size of accumulator buffer */
+ /* Reset accumulator dirty flag */
+ file->accum_dirty=FALSE;
+ } /* end if */
- /* Shrink the accumulator buffer */
- if ((file->meta_accum=H5FL_BLK_REALLOC(meta_accum,file->meta_accum,new_size))==NULL)
+ /* Cache the new piece of metadata */
+ /* Check if we need to resize the buffer */
+ if(size>file->accum_buf_size) {
+ /* Grow the metadata accumulator buffer */
+ if ((file->meta_accum=H5FL_BLK_REALLOC(meta_accum,file->meta_accum,size))==NULL)
HRETURN_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "unable to allocate metadata accumulator buffer");
/* Note the new buffer size */
- file->accum_buf_size=new_size;
+ file->accum_buf_size=size;
} /* end if */
- } /* end else */
+ else {
+ /* Check if we should shrink the accumulator buffer */
+ if(size<(file->accum_buf_size/H5FD_ACCUM_THROTTLE) &&
+ file->accum_buf_size>H5FD_ACCUM_THRESHOLD) {
+ hsize_t new_size=(file->accum_buf_size/H5FD_ACCUM_THROTTLE); /* New size of accumulator buffer */
- /* Update accumulator information */
- file->accum_loc=addr;
- file->accum_size=size;
- file->accum_dirty=FALSE;
+ /* Shrink the accumulator buffer */
+ if ((file->meta_accum=H5FL_BLK_REALLOC(meta_accum,file->meta_accum,new_size))==NULL)
+ HRETURN_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "unable to allocate metadata accumulator buffer");
- /* Read into accumulator */
- if ((file->cls->read)(file, H5FD_MEM_DEFAULT, dxpl_id, file->accum_loc, file->accum_size, file->meta_accum)<0)
- HRETURN_ERROR(H5E_VFL, H5E_READERROR, FAIL, "driver read request failed");
+ /* Note the new buffer size */
+ file->accum_buf_size=new_size;
+ } /* end if */
+ } /* end else */
- /* Copy into buffer */
- assert(size==(hsize_t)((size_t)size)); /*check for overflow*/
- HDmemcpy(buf,file->meta_accum,(size_t)size);
+ /* Update accumulator information */
+ file->accum_loc=addr;
+ file->accum_size=size;
+ file->accum_dirty=FALSE;
+
+ /* Read into accumulator */
+ if ((file->cls->read)(file, H5FD_MEM_DEFAULT, dxpl_id, file->accum_loc, file->accum_size, file->meta_accum)<0)
+ HRETURN_ERROR(H5E_VFL, H5E_READERROR, FAIL, "driver read request failed");
+
+ /* Copy into buffer */
+ assert(size==(hsize_t)((size_t)size)); /*check for overflow*/
+ HDmemcpy(buf,file->meta_accum,(size_t)size);
+ } /* end if */
+ else {
+ /* Dispatch to driver */
+ if ((file->cls->read)(file, type, dxpl_id, addr, size, buf)<0)
+ HRETURN_ERROR(H5E_VFL, H5E_READERROR, FAIL, "driver read request failed");
+ } /* end else */
} /* end else */
} /* end if */
else {
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index 6b4a20c..7992b12 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -952,6 +952,15 @@ H5FD_mpio_query(const H5FD_t *_file, unsigned long *flags /* out */)
if(flags) {
*flags=0;
*flags|=H5FD_FEAT_AGGREGATE_METADATA; /* OK to aggregate metadata allocations */
+
+ /* Distinguish between updating the metadata accumulator on writes and
+ * reads. This is particularly (perhaps only, even) important for MPI-I/O
+ * where we guarantee that writes are collective, but reads may not be.
+ * If we were to allow the metadata accumulator to be written during a
+ * read operation, the application would hang.
+ */
+ *flags|=H5FD_FEAT_ACCUMULATE_METADATA_WRITE; /* OK to accumulate metadata for faster writes */
+
*flags|=H5FD_FEAT_AGGREGATE_SMALLDATA; /* OK to aggregate "small" raw data allocations */
} /* end if */
diff --git a/src/H5FDpublic.h b/src/H5FDpublic.h
index 802b1c9..0a8ea5d 100644
--- a/src/H5FDpublic.h
+++ b/src/H5FDpublic.h
@@ -89,8 +89,16 @@ typedef enum H5FD_mem_t {
* the library will attempt to cache metadata as it is written to the file
* and build up a larger block of metadata to eventually pass to the VFL
* 'write' routine.
+ *
+ * Distinguish between updating the metadata accumulator on writes and
+ * reads. This is particularly (perhaps only, even) important for MPI-I/O
+ * where we guarantee that writes are collective, but reads may not be.
+ * If we were to allow the metadata accumulator to be written during a
+ * read operation, the application would hang.
*/
-#define H5FD_FEAT_ACCUMULATE_METADATA 0x00000002
+#define H5FD_FEAT_ACCUMULATE_METADATA_WRITE 0x00000002
+#define H5FD_FEAT_ACCUMULATE_METADATA_READ 0x00000004
+#define H5FD_FEAT_ACCUMULATE_METADATA (H5FD_FEAT_ACCUMULATE_METADATA_WRITE|H5FD_FEAT_ACCUMULATE_METADATA_READ)
/*
* Defining the H5FD_FEAT_DATA_SIEVE for a VFL driver means that
* the library will attempt to cache raw data as it is read from/written to
@@ -98,13 +106,13 @@ typedef enum H5FD_mem_t {
* http://www.mcs.anl.gov/~thakur/papers/romio-coll.ps.gz
* http://www.mcs.anl.gov/~thakur/papers/mpio-high-perf.ps.gz
*/
-#define H5FD_FEAT_DATA_SIEVE 0x00000004
+#define H5FD_FEAT_DATA_SIEVE 0x00000008
/*
* Defining the H5FD_FEAT_AGGREGATE_SMALLDATA for a VFL driver means that
* the library will attempt to allocate a larger block for "small" raw data
* and then sub-allocate "small" raw data requests from that larger block.
*/
-#define H5FD_FEAT_AGGREGATE_SMALLDATA 0x00000008
+#define H5FD_FEAT_AGGREGATE_SMALLDATA 0x00000010
/* Forward declaration */