summaryrefslogtreecommitdiffstats
path: root/src/H5FDsubfiling.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/H5FDsubfiling.c')
-rw-r--r--src/H5FDsubfiling.c176
1 files changed, 118 insertions, 58 deletions
diff --git a/src/H5FDsubfiling.c b/src/H5FDsubfiling.c
index 27ed44a..b19df57 100644
--- a/src/H5FDsubfiling.c
+++ b/src/H5FDsubfiling.c
@@ -900,14 +900,16 @@ H5FD__subfiling_close(H5FD_t *_file)
{
H5FD_subfiling_t *file_ptr = (H5FD_subfiling_t *)_file;
herr_t ret_value = SUCCEED; /* Return value */
- // subfiling_context_t *sf_context = NULL;
+ subfiling_context_t *sf_context = NULL;
FUNC_ENTER_NOAPI_NOINIT
/* Sanity check */
HDassert(file_ptr);
-#ifdef VERBOSE
+
sf_context = (subfiling_context_t *)get__subfiling_object(file_ptr->fa.common.context_id);
+
+#ifdef VERBOSE
if (sf_context->topology->rank_is_ioc)
printf("[%s %d] fd=%d\n", __func__, file_ptr->mpi_rank, sf_context->sf_fid);
else
@@ -918,6 +920,20 @@ H5FD__subfiling_close(H5FD_t *_file)
HSYS_GOTO_ERROR(H5E_IO, H5E_CANTCLOSEFILE, FAIL, "unable to close file")
}
+ if (sf_context != NULL) {
+ if (sf_context->subfile_prefix) {
+ HDfree(sf_context->subfile_prefix);
+ sf_context->subfile_prefix = NULL;
+ }
+ if (sf_context->sf_filename) {
+ HDfree(sf_context->sf_filename);
+ sf_context->sf_filename = NULL;
+ }
+ if (sf_context->h5_filename) {
+ HDfree(sf_context->h5_filename);
+ sf_context->h5_filename = NULL;
+ }
+ }
/* if set, close the copy of the plist for the underlying VFD. */
if ((H5I_INVALID_HID != file_ptr->fa.common.ioc_fapl_id) &&
(H5I_dec_ref(file_ptr->fa.common.ioc_fapl_id) < 0))
@@ -1099,13 +1115,65 @@ H5FD__subfiling_set_eoa(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, haddr_t a
/*-------------------------------------------------------------------------
* Function: H5FD_subfiling_get_eof
*
- * Purpose: Returns the end-of-file marker, which is the greater of
- * either the filesystem end-of-file or the HDF5 end-of-address
- * markers.
+ * Purpose: Returns the end-of-file marker from the filesystem
+ * perspective.
*
* Return: End of file address, the first address past the end of the
* "file", either the filesystem file or the HDF5 file.
*
+ * SUBFILING NOTE:
+ * The EOF calculation for subfiling is somewhat different
+ * than for the more traditional HDF5 file implementations.
+ * This statement derives from the fact that unlike "normal"
+ * HDF5 files, subfiling introduces a multi-file representation
+ * of a single HDF5 file. The plurality of sub-files represents
+ * a software RAID-0 based HDF5 file. As such, each sub-file
+ * contains a designated portion of the address space of the
+ * virtual HDF5 storage. We have no notion of HDF5 datatypes,
+ * datasets, metadata, or other HDF5 structures; only BYTES.
+ *
+ * The organization of the bytes within sub-files is consistent
+ * with the RAID-0 striping, i.e. there are IO Concentrators
+ * (IOCs) which correspond to a stripe-count (in Lustre) as
+ * well as a stripe_size. The combiniation of these two
+ * variables determines the "address" (a combination of IOC
+ * and a file offset) of any storage operation.
+ *
+ * Having a defined storage layout, the virtual file EOF
+ * calculation shoud be the MAXIMUM value returned by the
+ * collection of IOCs. Every MPI rank which hosts an IOC
+ * maintains it's own EOF by updating that value for each
+ * WRITE operation that completes, i.e. if a new local EOF
+ * is greater than the existing local EOF, the new EOF
+ * will replace the old. The local EOF calculation is as
+ * follows.
+ * 1. At file creation, each IOC is assigned a rank value
+ * (0 to N-1, where N is the total number of IOCs) and
+ * a 'sf_base_addr' = 'subfile_rank' * 'sf_stripe_size')
+ * we also determine the 'sf_blocksize_per_stripe' which
+ * is simply the 'sf_stripe_size' * 'n_ioc_concentrators'
+ *
+ * 2. For every write operation, the IOC recieves a message
+ * containing a file_offset and the data_size.
+ * 3. The file_offset + data_size are in turn used to
+ * create a stripe_id:
+ * IOC-(ioc_rank) IOC-(ioc_rank+1)
+ * |<- sf_base_address |<- sf_base_address |
+ * ID +--------------------+--------------------+
+ * 0:|<- sf_stripe_size ->|<- sf_stripe_size ->|
+ * 1:|<- sf_stripe_size ->|<- sf_stripe_size ->|
+ * ~ ~ ~
+ * N:|<- sf_stripe_size ->|<- sf_stripe_size ->|
+ * +--------------------+--------------------+
+ *
+ * The new 'stripe_id' is then used to calculate a
+ * potential new EOF:
+ * sf_eof = (stripe_id * sf_blocksize_per_stripe) + sf_base_addr
+ * + ((file_offset + data_size) % sf_stripe_size)
+ *
+ * 4. If (sf_eof > current_sf_eof), then current_sf_eof = sf_eof.
+ *
+ *
* Programmer: Richard Warren
*
*-------------------------------------------------------------------------
@@ -1208,11 +1276,12 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, hid_t H5_ATT
#endif
if (ioc_total > 1) {
+ size_t max_depth;
blocksize = sf_context->sf_blocksize_per_stripe;
#if 0 /* JRM */
size_t max_depth = (size_t)(size / blocksize) + 2;
#else /* JRM */
- size_t max_depth = (size / (size_t)blocksize) + 2;
+ max_depth = (size / (size_t)blocksize) + 2;
#endif /* JRM */
int next, ioc_count = 0, ioc_start = -1;
@@ -1447,11 +1516,12 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, hid_t H5_AT
#endif
if (ioc_total > 1) {
+ size_t max_depth;
blocksize = sf_context->sf_blocksize_per_stripe;
#if 0 /* JRM */
- size_t max_depth = (size_t)(size / blocksize) + 2;
+ size_t max_depth = (size_t)(size / blocksize) + 2;
#else /* JRM */
- size_t max_depth = (size_t)(size / (size_t)blocksize) + 2;
+ max_depth = (size_t)(size / (size_t)blocksize) + 2;
#endif /* JRM */
int next, ioc_count = 0, ioc_start = -1;
@@ -1507,13 +1577,13 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, hid_t H5_AT
size, /* (in) IO size */
1); /* (in) data extent of the 'type' assumes byte */
#else /* JRM */
- count = init__indep_io(sf_context, /* We use the context to look up config info */
+ count = init__indep_io(sf_context, /* We use the context to look up config info */
max_depth, ioc_total, (int64_t *)source_data_offset, /* (out) Memory offset */
(int64_t *)sf_data_size, /* (out) Length of this contiguous block */
(int64_t *)sf_offset, /* (out) File offset */
- &ioc_start, /* (out) IOC index corresponding to starting offset */
- &ioc_count, /* (out) number of actual IOCs used */
- offset, /* (in) Starting file offset */
+ &ioc_start, /* (out) IOC index corresponding to starting offset */
+ &ioc_count, /* (out) number of actual IOCs used */
+ offset, /* (in) Starting file offset */
(int64_t)size, /* (in) IO size */
1); /* (in) data extent of the 'type' assumes byte */
#endif /* JRM */
@@ -1840,7 +1910,7 @@ H5FD__subfiling_truncate(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, hbool_t H5
H5FD_subfiling_t *file = (H5FD_subfiling_t *)_file;
herr_t ret_value = SUCCEED; /* Return value */
- FUNC_ENTER_NOAPI_NOINIT
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
HDassert(file);
@@ -1875,22 +1945,18 @@ H5FD__subfiling_truncate(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, hbool_t H5
static herr_t
H5FD__subfiling_lock(H5FD_t *_file, hbool_t rw)
{
- H5FD_subfiling_t *file_ptr = (H5FD_subfiling_t *)_file; /* VFD file struct */
+ H5FD_subfiling_t *file = (H5FD_subfiling_t *)_file; /* VFD file struct */
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI_NOINIT
- HDassert(file_ptr);
-
- /* Set exclusive or shared lock based on rw status */
- if (file_ptr->fa.require_ioc) {
+ HDassert(file);
+ if (file->fa.require_ioc)
puts("Subfiling driver doesn't suport file locking");
- }
else {
- if (H5FD_lock(file_ptr->sf_file, rw) < 0)
+ if (H5FD_lock(file->sf_file, rw) < 0)
HSYS_GOTO_ERROR(H5E_FILE, H5E_BADFILE, FAIL, "unable to lock file")
} /* end if */
-
done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5FD_subfiling_lock() */
@@ -1916,14 +1982,8 @@ H5FD__subfiling_unlock(H5FD_t *_file)
HDassert(file);
- if (HDflock(file->fd, LOCK_UN) < 0) {
- if (ENOSYS == errno)
- HSYS_GOTO_ERROR(H5E_FILE, H5E_BADFILE, FAIL,
- "file locking disabled on this file system (use "
- "HDF5_USE_FILE_LOCKING environment variable to override)")
- else
- HSYS_GOTO_ERROR(H5E_FILE, H5E_BADFILE, FAIL, "unable to unlock file")
- } /* end if */
+ if (H5FD_unlock(file->sf_file) < 0)
+ HSYS_GOTO_ERROR(H5E_FILE, H5E_BADFILE, FAIL, "unable to lock file")
done:
FUNC_LEAVE_NOAPI(ret_value)
@@ -2069,7 +2129,7 @@ create__simple_vector(hid_t H5_ATTR_UNUSED file_space_id, void *memDataBuf, hadd
bufs[0] = nextBuf;
offsets[0] = addrBase;
- blocklens[0] = (hssize_t)((hssize_t)elements * type_extent);
+ blocklens[0] = (hssize_t)((hssize_t)elements * (hssize_t)type_extent);
if (*vlen < 0) {
*_offsets = offsets;
@@ -2246,9 +2306,10 @@ get__base_offset(int mpi_rank, int mpi_size, size_t dtype_extent, hid_t mem_spac
}
herr_t
-H5FD__dataset_write_contiguous(hid_t h5_file_id, haddr_t dataset_baseAddr, size_t dtype_extent, int mpi_rank,
- int mpi_size, void *_dset, hid_t mem_type_id, hid_t mem_space_id,
- hid_t file_space_id, hid_t plist_id, const void *buf)
+H5FD__dataset_write_contiguous(hid_t H5_ATTR_UNUSED h5_file_id, haddr_t dataset_baseAddr, size_t dtype_extent,
+ int mpi_rank, int mpi_size, void H5_ATTR_UNUSED *_dset,
+ hid_t H5_ATTR_UNUSED mem_type_id, hid_t mem_space_id, hid_t file_space_id,
+ hid_t H5_ATTR_UNUSED plist_id, const void *buf)
{
herr_t ret_value = SUCCEED; /* Return value */
hssize_t num_elem_file = (hssize_t)-1, num_elem_mem = (hssize_t)-1;
@@ -2262,20 +2323,21 @@ H5FD__dataset_write_contiguous(hid_t h5_file_id, haddr_t dataset_baseAddr, size_
FUNC_ENTER_PACKAGE
if ((num_elem_file = H5Sget_select_npoints(file_space_id)) < 0)
- puts("can't get number of points in file selection");
+ HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "can't get number of points in file selection")
+
if ((num_elem_mem = H5Sget_select_npoints(mem_space_id)) < 0)
- puts("can't get number of points in memory selection");
+ HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "can't get number of points in memory selection")
if (num_elem_file != num_elem_mem)
- puts("number of elements selected in file and memory dataspaces is "
- "different");
+ HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL,
+ "number of elements selected"
+ " in file and memory dataspaces is different")
- if (H5S_get_validated_dataspace(mem_space_id, &mem_space) < 0) {
- puts("could not get a validated dataspace from mem_space_id");
- }
- if (H5S_get_validated_dataspace(file_space_id, &file_space) < 0) {
- puts("could not get a validated dataspace from file_space_id");
- }
+ if (H5S_get_validated_dataspace(mem_space_id, &mem_space) < 0)
+ HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "could not get a validated dataspace from mem_space_id")
+
+ if (H5S_get_validated_dataspace(file_space_id, &file_space) < 0)
+ HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "could not get a validated dataspace from file_space_id")
if (num_elem_file > 0) {
sel_type = H5Sget_select_type(file_space_id);
@@ -2357,25 +2419,27 @@ done:
}
herr_t
-H5FD__dataset_read_contiguous(hid_t h5_file_id, haddr_t dataset_baseAddr, size_t dtype_extent, int mpi_rank,
- int mpi_size, void *_dset, hid_t mem_type_id, hid_t mem_space_id,
- hid_t file_space_id, hid_t plist_id, void *buf)
+H5FD__dataset_read_contiguous(hid_t H5_ATTR_UNUSED h5_file_id, haddr_t dataset_baseAddr, size_t dtype_extent,
+ int mpi_rank, int mpi_size, void H5_ATTR_UNUSED *_dset,
+ hid_t H5_ATTR_UNUSED mem_type_id, hid_t mem_space_id, hid_t file_space_id,
+ hid_t H5_ATTR_UNUSED plist_id, void *buf)
{
- H5FD_t * dset = (H5FD_t *)_dset;
herr_t ret_value = SUCCEED; /* Return value */
hssize_t num_elem_file = -1, num_elem_mem = -1;
H5S_sel_type sel_type;
hssize_t sf_vlen = -1;
+ int status = 0;
FUNC_ENTER_PACKAGE
if ((num_elem_file = H5Sget_select_npoints(file_space_id)) < 0)
- puts("can't get number of points in file selection");
+ HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "can't get number of points in file selection")
if ((num_elem_mem = H5Sget_select_npoints(mem_space_id)) < 0)
- puts("can't get number of points in memory selection");
+ HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "can't get number of points in memory selection")
if (num_elem_file != num_elem_mem)
- puts("number of elements selected in file and memory dataspaces is "
- "different");
+ HGOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL,
+ "number of elements selected"
+ " in file and memory dataspaces is different")
if (num_elem_file > 0) {
sel_type = H5Sget_select_type(file_space_id);
@@ -2384,7 +2448,6 @@ H5FD__dataset_read_contiguous(hid_t h5_file_id, haddr_t dataset_baseAddr, size_t
// printf("[%d] H5S_SEL_NONE\n", mpi_rank);
break;
case H5S_SEL_POINTS: {
- int status;
haddr_t rank_baseAddr;
rank_baseAddr =
get__base_offset(mpi_rank, mpi_size, dtype_extent, mem_space_id, file_space_id);
@@ -2397,7 +2460,6 @@ H5FD__dataset_read_contiguous(hid_t h5_file_id, haddr_t dataset_baseAddr, size_t
break;
}
case H5S_SEL_HYPERSLABS: {
- int status;
haddr_t rank_baseAddr;
const H5S_t *mem_space;
const H5S_t *file_space;
@@ -2417,11 +2479,10 @@ H5FD__dataset_read_contiguous(hid_t h5_file_id, haddr_t dataset_baseAddr, size_t
goto done;
}
if (status > 0) {
- hssize_t previous_vlen = sf_vlen;
if (sf_offsets == NULL)
sf_offsets = (haddr_t *)malloc(sizeof(haddr_t));
if (sf_sizes == NULL)
- sf_sizes = (hsize_t *)malloc(sizeof(hsize_t));
+ sf_sizes = (hssize_t *)malloc(sizeof(hsize_t));
if (sf_bufs == NULL)
sf_bufs = (void **)malloc(sizeof(void *));
sf_vlen = 1;
@@ -2430,13 +2491,12 @@ H5FD__dataset_read_contiguous(hid_t h5_file_id, haddr_t dataset_baseAddr, size_t
assert(sf_bufs);
sf_offsets[0] = rank_baseAddr;
- sf_sizes[0] = num_elem_mem * dtype_extent;
+ sf_sizes[0] = (hssize_t)((hssize_t)num_elem_mem * (hssize_t)dtype_extent);
sf_bufs[0] = buf;
}
break;
}
case H5S_SEL_ALL: {
- int status;
haddr_t rank_baseAddr;
rank_baseAddr =
get__base_offset(mpi_rank, mpi_size, dtype_extent, mem_space_id, file_space_id);
@@ -2558,7 +2618,7 @@ done:
#endif /* JRM */
void
-manage_client_logfile(int client_rank, int flag_value)
+manage_client_logfile(int H5_ATTR_UNUSED client_rank, int H5_ATTR_UNUSED flag_value)
{
#ifndef NDEBUG
if (flag_value) {