diff options
author | Quincey Koziol <koziol@hdfgroup.org> | 2002-06-27 13:11:31 (GMT) |
---|---|---|
committer | Quincey Koziol <koziol@hdfgroup.org> | 2002-06-27 13:11:31 (GMT) |
commit | 925d61b7754e1127783b1dce79f32c77a8826005 (patch) | |
tree | 52bbdadd7c65438d9b91c108f24de3263b31aa49 /perform/pio_engine.c | |
parent | 218c0a70e75e102170fbc202a45f79b91d76d7da (diff) | |
download | hdf5-925d61b7754e1127783b1dce79f32c77a8826005.zip hdf5-925d61b7754e1127783b1dce79f32c77a8826005.tar.gz hdf5-925d61b7754e1127783b1dce79f32c77a8826005.tar.bz2 |
[svn-r5718] Purpose:
New Feature
Description:
Added interleaved block pattern to MPI and HDF5 access methods. Also,
changed the blocks to just use the same size as the regular blocks for I/O,
instead of a separate block size.
Platforms tested:
IRIX64 6.5 (modi4) w/parallel
Diffstat (limited to 'perform/pio_engine.c')
-rw-r--r-- | perform/pio_engine.c | 301 |
1 files changed, 128 insertions, 173 deletions
diff --git a/perform/pio_engine.c b/perform/pio_engine.c index 0a59158..ca3ceeb 100644 --- a/perform/pio_engine.c +++ b/perform/pio_engine.c @@ -1,8 +1,18 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Copyright by the Board of Trustees of the University of Illinois. * + * All rights reserved. * + * * + * This file is part of HDF5. The full HDF5 copyright notice, including * + * terms governing use, modification, and redistribution, is contained in * + * the files COPYING and Copyright.html. COPYING can be found at the root * + * of the source code distribution tree; Copyright.html can be found at the * + * root level of an installed copy of the electronic HDF5 document set and * + * is linked from the top-level documents page. It can also be found at * + * http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html. If you do not have * + * access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + /* - * Copyright (C) 2001, 2002 - * National Center for Supercomputing Applications - * All rights reserved. - * * Author: Albert Cheng of NCSA, Oct 24, 2001. */ @@ -111,9 +121,9 @@ typedef union _file_descr { static char *pio_create_filename(iotype iot, const char *base_name, char *fullname, size_t size); static herr_t do_write(results *res, file_descr *fd, parameters *parms, - long ndsets, off_t nelmts, size_t blk_size, size_t buf_size, void *buffer); + long ndsets, off_t nelmts, size_t buf_size, void *buffer); static herr_t do_read(results *res, file_descr *fd, parameters *parms, - long ndsets, off_t nelmts, size_t blk_size, size_t buf_size, void *buffer /*out*/); + long ndsets, off_t nelmts, size_t buf_size, void *buffer /*out*/); static herr_t do_fopen(parameters *param, char *fname, file_descr *fd /*out*/, int flags); static herr_t do_fclose(iotype iot, file_descr *fd); @@ -152,7 +162,6 @@ do_pio(parameters param) off_t nelmts; char *buffer = NULL; /*data buffer pointer */ size_t buf_size; /*data buffer size in bytes */ - size_t blk_size; /*interleaved I/O block size */ /* HDF5 variables */ herr_t hrc; /*HDF5 return code */ @@ -186,7 +195,6 @@ do_pio(parameters param) nelmts = param.num_elmts; /* number of elements per dataset */ maxprocs = param.num_procs; /* max number of mpi-processes to use */ buf_size = param.buf_size; - blk_size = param.block_size; /* interleaved IO block size */ if (nfiles < 0 ) { fprintf(stderr, @@ -224,16 +232,6 @@ do_pio(parameters param) } } - /* Should only need blk_size <= buf_size. */ - /* More restrictive condition for easier implementation for now. */ - if (blk_size > 0 && (buf_size % blk_size)){ - HDfprintf(stderr, - "Transfer buffer size (%Hd) must be a multiple of the " - "interleaved I/O block size (%Hd)\n", - (long_long)buf_size, (long_long)blk_size); - GOTOERROR(FAIL); - } - if (pio_debug_level >= 4) { int myrank; @@ -262,7 +260,7 @@ do_pio(parameters param) VRFY((hrc == SUCCESS), "do_fopen failed"); set_time(res.timers, HDF5_FINE_WRITE_FIXED_DIMS, START); - hrc = do_write(&res, &fd, ¶m, ndsets, nelmts, blk_size, buf_size, buffer); + hrc = do_write(&res, &fd, ¶m, ndsets, nelmts, buf_size, buffer); set_time(res.timers, HDF5_FINE_WRITE_FIXED_DIMS, STOP); VRFY((hrc == SUCCESS), "do_write failed"); @@ -286,7 +284,7 @@ do_pio(parameters param) VRFY((hrc == SUCCESS), "do_fopen failed"); set_time(res.timers, HDF5_FINE_READ_FIXED_DIMS, START); - hrc = do_read(&res, &fd, ¶m, ndsets, nelmts, blk_size, buf_size, buffer); + hrc = do_read(&res, &fd, ¶m, ndsets, nelmts, buf_size, buffer); set_time(res.timers, HDF5_FINE_READ_FIXED_DIMS, STOP); VRFY((hrc == SUCCESS), "do_read failed"); @@ -449,7 +447,7 @@ pio_create_filename(iotype iot, const char *base_name, char *fullname, size_t si */ static herr_t do_write(results *res, file_descr *fd, parameters *parms, long ndsets, - off_t nelmts, size_t blk_size, size_t buf_size, void *buffer) + off_t nelmts, size_t buf_size, void *buffer) { int ret_code = SUCCESS; int rc; /*routine return code */ @@ -464,7 +462,6 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, off_t file_offset; /*file offset of the next transfer */ off_t dset_size; /*one dataset size in bytes */ size_t nelmts_in_buf; /*how many element the buffer holds */ - size_t nelmts_in_blk=0; /*how many element a block holds */ off_t elmts_begin; /*first elmt this process transfer */ off_t elmts_count; /*number of elmts this process transfer */ hid_t dcpl = -1; /* Dataset creation property list */ @@ -589,7 +586,7 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, * transferred by this process. It may be different for different * transfer pattern due to rounding to integral values. */ - if (blk_size==0){ + if (parms->interleaved==0) { /* Contiguous Pattern: * Calculate the beginning element of this process and the next. * elmts_count is the difference between these two beginnings. @@ -605,11 +602,12 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, else /* last process. Take whatever are left */ elmts_count = nelmts - elmts_begin; - }else{ + } /* end if */ + else { /* Interleaved Pattern: - * Each process takes blk_size of elements, starting with the first + * Each process takes buf_size of elements, starting with the first * process. So, the last process may have fewer or even none. - * Calculate the beginning element of this process and the next. + * Calculate the beginning element of this process. * The elmnts_begin here marks only the beginning of the first * block accessed by this process. */ @@ -621,22 +619,21 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, */ off_t remain_nelmts, remain_begin; /* unallocated remaining*/ - nelmts_in_blk = blk_size/ELMT_SIZE; - elmts_begin = (off_t)(nelmts_in_blk*pio_mpi_rank_g); + elmts_begin = (off_t)(nelmts_in_buf*pio_mpi_rank_g); /* must use integer calculation next */ /* allocate equal blocks per process */ - elmts_count = (nelmts / (off_t)(nelmts_in_blk*pio_mpi_nprocs_g)) * - (off_t)nelmts_in_blk; - remain_nelmts = nelmts % ((off_t)(nelmts_in_blk*pio_mpi_nprocs_g)); + elmts_count = (nelmts / (off_t)(nelmts_in_buf*pio_mpi_nprocs_g)) * + (off_t)nelmts_in_buf; + remain_nelmts = nelmts % (off_t)(nelmts_in_buf*pio_mpi_nprocs_g); /* allocate any remaining */ - remain_begin = (off_t)(nelmts_in_blk*pio_mpi_rank_g); + remain_begin = (off_t)(nelmts_in_buf*pio_mpi_rank_g); if (remain_nelmts > remain_begin){ /* it gets something */ - if (remain_nelmts > (remain_begin+(off_t)nelmts_in_blk)){ + if (remain_nelmts > (remain_begin+(off_t)nelmts_in_buf)){ /* one full block */ - elmts_count += nelmts_in_blk; + elmts_count += nelmts_in_buf; }else{ /* only a partial block */ elmts_count += remain_nelmts - remain_begin; @@ -647,15 +644,15 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, if (pio_debug_level >= 4) { HDprint_rank(output); HDfprintf(output, "Debug(do_write): " - "nelmts_in_blk=%Hd, elmts_begin=%Hd, elmts_count=%Hd\n", - (long_long)nelmts_in_blk, (long_long)elmts_begin, + "nelmts_in_buf=%Hd, elmts_begin=%Hd, elmts_count=%Hd\n", + (long_long)nelmts_in_buf, (long_long)elmts_begin, (long_long)elmts_count); } /* The task is to transfer elmts_count elements, starting at * elmts_begin position, using transfer buffer of buf_size bytes. - * If blk_size > 0, select blk_size at a time, in round robin + * If interleaved, select buf_size at a time, in round robin * fashion, according to number of process. Otherwise, select * all elmt_count in contiguous. */ @@ -688,77 +685,57 @@ do_write(results *res, file_descr *fd, parameters *parms, long ndsets, /* Calculate offset of write within a dataset/file */ switch (parms->io_type) { case POSIXIO: - if (blk_size==0){ + if (parms->interleaved==0) { /* Contiguous pattern */ /* need to (off_t) the elmnts_begin expression because they */ /* may be of smaller sized integer types */ file_offset = dset_offset + (off_t)(elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE; - - /* only care if seek returns error */ - rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; - VRFY((rc==0), "POSIXSEEK"); - /* check if all bytes are transferred */ - rc = ((ssize_t)(nelmts_toxfer*ELMT_SIZE) == - POSIXWRITE(fd->posixfd, buffer, nelmts_toxfer*ELMT_SIZE)); - VRFY((rc != 0), "POSIXWRITE"); - }else{ - /* interleaved access pattern */ - char *buf_p=buffer; - size_t xferred=0; - size_t toxfer=0; - - file_offset = dset_offset + (off_t)(elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE; - if (pio_debug_level >= 4) { -HDprint_rank(output); -HDfprintf(output, -"Debug(do_write): " -"nelmts_toxfer=%Hd, nelmts_xfer=%Hd\n" -, -(long_long)nelmts_toxfer, (long_long)nelmts_xfer); -} - while (xferred < nelmts_toxfer){ - if ((nelmts_toxfer - xferred) >= nelmts_in_blk) - toxfer = nelmts_in_blk; - else - toxfer = nelmts_toxfer - xferred; - /* Skip offset over blocks of other processes */ - file_offset = dset_offset + - (off_t)(elmts_begin + (nelmts_xfer+xferred)*pio_mpi_nprocs_g)*(off_t)ELMT_SIZE; - if (pio_debug_level >= 4) { -HDprint_rank(output); -HDfprintf(output, -"Debug(do_write): " -"nelmts_toxfer=%Hd, nelmts_xfer=%Hd" -", toxfer=%Hd, xferred=%Hd" -", file_offset=%Hd" -"\n", -(long_long)nelmts_toxfer, (long_long)nelmts_xfer, -(long_long)toxfer, (long_long)xferred, -(long_long)file_offset); -} - /* only care if seek returns error */ - rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; - VRFY((rc==0), "POSIXSEEK"); - /* check if all bytes are written */ - rc = ((ssize_t)(toxfer*ELMT_SIZE) == - POSIXWRITE(fd->posixfd, buf_p, toxfer*ELMT_SIZE)); - VRFY((rc != 0), "POSIXWRITE"); - xferred += toxfer; - } - } + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + file_offset = dset_offset + + (off_t)(elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g))*(off_t)ELMT_SIZE; + } /* end else */ + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc==0), "POSIXSEEK"); + /* check if all bytes are transferred */ + rc = ((ssize_t)(nelmts_toxfer*ELMT_SIZE) == + POSIXWRITE(fd->posixfd, buffer, nelmts_toxfer*ELMT_SIZE)); + VRFY((rc != 0), "POSIXWRITE"); break; case MPIO: - mpi_offset = dset_offset + (elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE; + if (parms->interleaved==0){ + /* Contiguous pattern */ + mpi_offset = dset_offset + (elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + mpi_offset = dset_offset + (elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g))*(off_t)ELMT_SIZE; + } /* end else */ + mrc = MPI_File_write_at(fd->mpifd, mpi_offset, buffer, - (int)(nelmts_toxfer), ELMT_MPI_TYPE, + (int)nelmts_toxfer, ELMT_MPI_TYPE, &mpi_status); VRFY((mrc==MPI_SUCCESS), "MPIO_WRITE"); break; + case PHDF5: /*set up the dset space id to select the segment to process */ { - h5mem_start[0] = elmts_begin + nelmts_xfer; + if (parms->interleaved==0){ + /* Contiguous pattern */ + h5mem_start[0] = elmts_begin + nelmts_xfer; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5mem_start[0] = elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g); + } /* end else */ h5mem_stride[0] = h5mem_block[0] = nelmts_toxfer; h5mem_count[0] = 1; hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, @@ -834,7 +811,7 @@ done: */ static herr_t do_read(results *res, file_descr *fd, parameters *parms, long ndsets, - off_t nelmts, size_t blk_size, size_t buf_size, void *buffer /*out*/) + off_t nelmts, size_t buf_size, void *buffer /*out*/) { int ret_code = SUCCESS; int rc; /*routine return code */ @@ -849,7 +826,6 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets, off_t file_offset; /*file offset of the next transfer */ off_t dset_size; /*one dataset size in bytes */ size_t nelmts_in_buf; /*how many element the buffer holds */ - size_t nelmts_in_blk=0; /*how many element a block holds */ off_t elmts_begin; /*first elmt this process transfer */ off_t elmts_count; /*number of elmts this process transfer */ @@ -935,7 +911,7 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets, * transferred by this process. It may be different for different * transfer pattern due to rounding to integral values. */ - if (blk_size==0){ + if (parms->interleaved==0){ /* Contiguous Pattern: * Calculate the beginning element of this process and the next. * elmts_count is the difference between these two beginnings. @@ -951,11 +927,12 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets, else /* last process. Take whatever are left */ elmts_count = nelmts - elmts_begin; - }else{ + } /* end if */ + else { /* Interleaved Pattern: - * Each process takes blk_size of elements, starting with the first + * Each process takes buf_size of elements, starting with the first * process. So, the last process may have fewer or even none. - * Calculate the beginning element of this process and the next. + * Calculate the beginning element of this process. * The elmnts_begin here marks only the beginning of the first * block accessed by this process. */ @@ -967,41 +944,41 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets, */ off_t remain_nelmts, remain_begin; /* unallocated remaining*/ - nelmts_in_blk = blk_size/ELMT_SIZE; - elmts_begin = (off_t)(nelmts_in_blk*pio_mpi_rank_g); + elmts_begin = (off_t)(nelmts_in_buf*pio_mpi_rank_g); /* must use integer calculation next */ /* allocate equal blocks per process */ - elmts_count = (nelmts / (off_t)(nelmts_in_blk*pio_mpi_nprocs_g)) * - (off_t)nelmts_in_blk; - remain_nelmts = nelmts % ((off_t)(nelmts_in_blk*pio_mpi_nprocs_g)); + elmts_count = (nelmts / (off_t)(nelmts_in_buf*pio_mpi_nprocs_g)) * + (off_t)nelmts_in_buf; + remain_nelmts = nelmts % ((off_t)(nelmts_in_buf*pio_mpi_nprocs_g)); /* allocate any remaining */ - remain_begin = (off_t)(nelmts_in_blk*pio_mpi_rank_g); - if (remain_nelmts > remain_begin){ + remain_begin = (off_t)(nelmts_in_buf*pio_mpi_rank_g); + if (remain_nelmts > remain_begin) { /* it gets something */ - if (remain_nelmts > (remain_begin+(off_t)nelmts_in_blk)){ + if (remain_nelmts > (remain_begin+(off_t)nelmts_in_buf)) { /* one full block */ - elmts_count += nelmts_in_blk; - }else{ + elmts_count += nelmts_in_buf; + } /* end if */ + else { /* only a partial block */ elmts_count += remain_nelmts - remain_begin; - } - } - } + } /* end else */ + } /* end if */ + } /* end else */ /* debug */ if (pio_debug_level >= 4) { HDprint_rank(output); HDfprintf(output, "Debug(do_read): " - "nelmts_in_blk=%Hd, elmts_begin=%Hd, elmts_count=%Hd\n", - (long_long)nelmts_in_blk, (long_long)elmts_begin, + "nelmts_in_buf=%Hd, elmts_begin=%Hd, elmts_count=%Hd\n", + (long_long)nelmts_in_buf, (long_long)elmts_begin, (long_long)elmts_count); } /* The task is to transfer elmts_count elements, starting at * elmts_begin position, using transfer buffer of buf_size bytes. - * If blk_size > 0, select blk_size at a time, in round robin + * If interleaved, select buf_size at a time, in round robin * fashion, according to number of process. Otherwise, select * all elmt_count in contiguous. */ @@ -1025,71 +1002,41 @@ do_read(results *res, file_descr *fd, parameters *parms, long ndsets, /* Calculate offset of read within a dataset/file */ switch (parms->io_type){ case POSIXIO: - if (blk_size==0){ + if (parms->interleaved==0){ /* Contiguous pattern */ /* need to (off_t) the elmnts_begin expression because they */ /* may be of smaller sized integer types */ file_offset = dset_offset + (off_t)(elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE; - - /* only care if seek returns error */ - rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; - VRFY((rc==0), "POSIXSEEK"); - /* check if all bytes are transferred */ - rc = ((ssize_t)(nelmts_toxfer*ELMT_SIZE) == - POSIXREAD(fd->posixfd, buffer, nelmts_toxfer*ELMT_SIZE)); - VRFY((rc != 0), "POSIXREAD"); - }else{ - /* interleaved access pattern */ - char *buf_p=buffer; - size_t xferred=0; - size_t toxfer=0; - - file_offset = dset_offset + (off_t)(elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE; - if (pio_debug_level >= 4) { -HDprint_rank(output); -HDfprintf(output, -"Debug(do_read): " -"nelmts_toxfer=%Hd, nelmts_xfer=%Hd\n" -, -(long_long)nelmts_toxfer, (long_long)nelmts_xfer); -} - while (xferred < nelmts_toxfer){ - if ((nelmts_toxfer - xferred) >= nelmts_in_blk) - toxfer = nelmts_in_blk; - else - toxfer = nelmts_toxfer - xferred; - /* Skip offset over blocks of other processes */ - file_offset = dset_offset + - (off_t)(elmts_begin + (nelmts_xfer+xferred)*pio_mpi_nprocs_g)*(off_t)ELMT_SIZE; - if (pio_debug_level >= 4) { -HDprint_rank(output); -HDfprintf(output, -"Debug(do_read):" -"nelmts_toxfer=%Hd, nelmts_xfer=%Hd" -", toxfer=%Hd, xferred=%Hd" -", file_offset=%Hd" -"\n", -(long_long)nelmts_toxfer, (long_long)nelmts_xfer, -(long_long)toxfer, (long_long)xferred, -(long_long)file_offset); -} - /* only care if seek returns error */ - rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; - VRFY((rc==0), "POSIXSEEK"); - /* check if all bytes are transferred */ - rc = ((ssize_t)(toxfer*ELMT_SIZE) == - POSIXREAD(fd->posixfd, buf_p, toxfer*ELMT_SIZE)); - VRFY((rc != 0), "POSIXREAD"); - xferred += toxfer; - } - } + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + file_offset = dset_offset + + (off_t)(elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g))*(off_t)ELMT_SIZE; + } /* end else */ + + /* only care if seek returns error */ + rc = POSIXSEEK(fd->posixfd, file_offset) < 0 ? -1 : 0; + VRFY((rc==0), "POSIXSEEK"); + /* check if all bytes are transferred */ + rc = ((ssize_t)(nelmts_toxfer*ELMT_SIZE) == + POSIXREAD(fd->posixfd, buffer, nelmts_toxfer*ELMT_SIZE)); + VRFY((rc != 0), "POSIXREAD"); break; case MPIO: - mpi_offset = dset_offset + (elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE; + if (parms->interleaved==0){ + /* Contiguous pattern */ + mpi_offset = dset_offset + (elmts_begin + nelmts_xfer)*(off_t)ELMT_SIZE; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + mpi_offset = dset_offset + (elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g))*(off_t)ELMT_SIZE; + } /* end else */ mrc = MPI_File_read_at(fd->mpifd, mpi_offset, buffer, - (int)(nelmts_toxfer), ELMT_MPI_TYPE, + (int)nelmts_toxfer, ELMT_MPI_TYPE, &mpi_status); VRFY((mrc==MPI_SUCCESS), "MPIO_read"); break; @@ -1097,7 +1044,15 @@ HDfprintf(output, case PHDF5: /*set up the dset space id to select the segment to process */ { - h5mem_start[0] = elmts_begin + nelmts_xfer; + if (parms->interleaved==0){ + /* Contiguous pattern */ + h5mem_start[0] = elmts_begin + nelmts_xfer; + } /* end if */ + else { + /* Interleaved access pattern */ + /* Skip offset over blocks of other processes */ + h5mem_start[0] = elmts_begin + (nelmts_xfer*pio_mpi_nprocs_g); + } /* end else */ h5mem_stride[0] = h5mem_block[0] = nelmts_toxfer; h5mem_count[0] = 1; hrc = H5Sselect_hyperslab(h5dset_space_id, H5S_SELECT_SET, |