diff options
author | jhendersonHDF <jhenderson@hdfgroup.org> | 2022-08-09 23:05:37 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-08-09 23:05:37 (GMT) |
commit | ef33ac8bac5fd201b41d1a3084f03834f47729a2 (patch) | |
tree | ad4756b872abff6d16f11d9a6c6c949e8f359cad /src | |
parent | b84241e57a97309b15846da4cc74611a66d92f6d (diff) | |
download | hdf5-ef33ac8bac5fd201b41d1a3084f03834f47729a2.zip hdf5-ef33ac8bac5fd201b41d1a3084f03834f47729a2.tar.gz hdf5-ef33ac8bac5fd201b41d1a3084f03834f47729a2.tar.bz2 |
Subfiling VFD - tidying up and fixing a few new testing failures (#1977)
* Rename Subfiling IOC "thread_pool_count" field to "thread_pool_size"
* Add simple HDF5 example for Subfiling VFD
* Subfiling VFD - never cache app topology as it may change
* Subfiling VFD - cleanup unused funtionality and tidy up some TODOs
* Subfiling VFD - tidy up subfiling error handling in H5subfiling_common.c
* Subfiling VFD - show number of failed I/O requests on close
* Subfiling VFD - Update file cmp callback after switching to MPI I/O VFD
* Amend RELEASE.txt with info about h5fuse.sh and Subfiling limitations
* Subfiling VFD - switch to using H5_basename and H5_dirname
Diffstat (limited to 'src')
-rw-r--r-- | src/H5FDsubfiling/H5FDioc.c | 100 | ||||
-rw-r--r-- | src/H5FDsubfiling/H5FDioc.h | 10 | ||||
-rw-r--r-- | src/H5FDsubfiling/H5FDioc_threads.c | 73 | ||||
-rw-r--r-- | src/H5FDsubfiling/H5FDsubfile_int.c | 53 | ||||
-rw-r--r-- | src/H5FDsubfiling/H5FDsubfiling.c | 277 | ||||
-rw-r--r-- | src/H5FDsubfiling/H5subfiling_common.c | 1463 | ||||
-rw-r--r-- | src/H5FDsubfiling/H5subfiling_common.h | 27 |
7 files changed, 522 insertions, 1481 deletions
diff --git a/src/H5FDsubfiling/H5FDioc.c b/src/H5FDsubfiling/H5FDioc.c index 6bfb1b7..5030055 100644 --- a/src/H5FDsubfiling/H5FDioc.c +++ b/src/H5FDsubfiling/H5FDioc.c @@ -61,36 +61,6 @@ typedef struct H5FD_ioc_t { char *file_dir; /* Directory where we find files */ char *file_path; /* The user defined filename */ - -#ifndef H5_HAVE_WIN32_API - /* On most systems the combination of device and i-node number uniquely - * identify a file. Note that Cygwin, MinGW and other Windows POSIX - * environments have the stat function (which fakes inodes) - * and will use the 'device + inodes' scheme as opposed to the - * Windows code further below. - */ - dev_t device; /* file device number */ -#else - /* Files in windows are uniquely identified by the volume serial - * number and the file index (both low and high parts). - * - * There are caveats where these numbers can change, especially - * on FAT file systems. On NTFS, however, a file should keep - * those numbers the same until renamed or deleted (though you - * can use ReplaceFile() on NTFS to keep the numbers the same - * while renaming). - * - * See the MSDN "BY_HANDLE_FILE_INFORMATION Structure" entry for - * more information. - * - * http://msdn.microsoft.com/en-us/library/aa363788(v=VS.85).aspx - */ - DWORD nFileIndexLow; - DWORD nFileIndexHigh; - DWORD dwVolumeSerialNumber; - - HANDLE hFile; /* Native windows file handle */ -#endif /* H5_HAVE_WIN32_API */ } H5FD_ioc_t; /* @@ -490,7 +460,7 @@ H5FD__ioc_get_default_config(hid_t fapl_id, H5FD_ioc_config_t *config_out) H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI I/O VFD on IOC under FAPL"); /* Specific to this I/O Concentrator */ - config_out->thread_pool_count = H5FD_IOC_DEFAULT_THREAD_POOL_SIZE; + config_out->thread_pool_size = H5FD_IOC_DEFAULT_THREAD_POOL_SIZE; done: if (H5_mpi_comm_free(&comm) < 0) @@ -536,7 +506,12 @@ H5FD__ioc_validate_config(const H5FD_ioc_config_t *fa) if (fa->magic != H5FD_IOC_FAPL_MAGIC) H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid H5FD_ioc_config_t magic value"); - /* TODO: add extra IOC configuration validation code */ + if (fa->under_fapl_id < 0) + H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid under FAPL ID"); + + if (fa->subf_config.ioc_selection < SELECT_IOC_ONE_PER_NODE || + fa->subf_config.ioc_selection >= ioc_selection_options) + H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid IOC selection method"); done: H5_SUBFILING_FUNC_LEAVE; @@ -850,24 +825,15 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr) } if (NULL != (file_ptr->file_path = HDrealpath(name, NULL))) { - char *path = NULL; - char *directory = dirname(path); - - if (NULL == (path = HDstrdup(file_ptr->file_path))) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCOPY, NULL, "can't copy subfiling subfile path"); - if (NULL == (file_ptr->file_dir = HDstrdup(directory))) { - HDfree(path); - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCOPY, NULL, - "can't copy subfiling subfile directory path"); + if (H5_dirname(file_ptr->file_path, &file_ptr->file_dir) < 0) { + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "couldn't get subfile dirname"); } - - HDfree(path); } else { if (ENOENT == errno) { if (NULL == (file_ptr->file_path = HDstrdup(name))) H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCOPY, NULL, "can't copy file name"); - if (NULL == (file_ptr->file_dir = HDstrdup("."))) + if (NULL == (file_ptr->file_dir = H5MM_strdup("."))) H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, NULL, "can't set subfile directory path"); } else @@ -983,7 +949,7 @@ H5FD__ioc_close_int(H5FD_ioc_t *file_ptr) #ifdef H5FD_IOC_DEBUG { - subfiling_context_t *sf_context = H5_get_subfiling_object(file_ptr->fa.context_id); + subfiling_context_t *sf_context = H5_get_subfiling_object(file_ptr->context_id); if (sf_context) { if (sf_context->topology->rank_is_ioc) HDprintf("[%s %d] fd=%d\n", __func__, file_ptr->mpi_rank, sf_context->sf_fid); @@ -1035,7 +1001,7 @@ done: HDfree(file_ptr->file_path); file_ptr->file_path = NULL; - HDfree(file_ptr->file_dir); + H5MM_free(file_ptr->file_dir); file_ptr->file_dir = NULL; /* Release the file info */ @@ -1089,8 +1055,31 @@ H5FD__ioc_cmp(const H5FD_t *_f1, const H5FD_t *_f2) HDassert(f1); HDassert(f2); - ret_value = H5FD_cmp(f1->ioc_file, f2->ioc_file); + if (f1->ioc_file && f1->ioc_file->cls && f1->ioc_file->cls->cmp && f2->ioc_file && f2->ioc_file->cls && + f2->ioc_file->cls->cmp) { + ret_value = H5FD_cmp(f1->ioc_file, f2->ioc_file); + } + else { + h5_stat_t st1; + h5_stat_t st2; + + /* + * If under VFD has no compare routine, get + * inode of HDF5 stub file and compare them + * + * Note that the compare callback doesn't + * allow for failure, so we just return -1 + * if stat fails. + */ + if (HDstat(f1->file_path, &st1) < 0) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_VFL, H5E_CANTGET, -1, "couldn't stat file"); + if (HDstat(f2->file_path, &st2) < 0) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_VFL, H5E_CANTGET, -1, "couldn't stat file"); + + ret_value = (st1.st_ino > st2.st_ino) - (st1.st_ino < st2.st_ino); + } +done: H5_SUBFILING_FUNC_LEAVE; } /* end H5FD__ioc_cmp */ @@ -1607,8 +1596,6 @@ H5FD__ioc_del(const char *name, hid_t fapl) MPI_Comm comm = MPI_COMM_NULL; MPI_Info info = MPI_INFO_NULL; FILE *config_file = NULL; - char *name_copy = NULL; - char *name_copy2 = NULL; char *tmp_filename = NULL; char *base_filename = NULL; char *file_dirname = NULL; @@ -1647,13 +1634,10 @@ H5FD__ioc_del(const char *name, hid_t fapl) if (HDstat(name, &st) < 0) H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_SYSERRSTR, FAIL, "HDstat failed"); - if (NULL == (name_copy = HDstrdup(name))) - H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't copy filename"); - if (NULL == (name_copy2 = HDstrdup(name))) - H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't copy filename"); - - base_filename = basename(name_copy); - file_dirname = dirname(name_copy2); + if (H5_basename(name, &base_filename) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't get file basename"); + if (H5_dirname(name, &file_dirname) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't get file dirname"); /* Try to open the subfiling configuration file and get the number of IOCs */ if (NULL == (tmp_filename = HDmalloc(PATH_MAX))) @@ -1732,8 +1716,8 @@ done: H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI info object"); HDfree(tmp_filename); - HDfree(name_copy); - HDfree(name_copy2); + H5MM_free(file_dirname); + H5MM_free(base_filename); H5_SUBFILING_FUNC_LEAVE; } diff --git a/src/H5FDsubfiling/H5FDioc.h b/src/H5FDsubfiling/H5FDioc.h index 48102ac..7173aa9 100644 --- a/src/H5FDsubfiling/H5FDioc.h +++ b/src/H5FDsubfiling/H5FDioc.h @@ -108,7 +108,7 @@ * for compatibility with legacy HDF5 applications. The default driver used * is currently the #H5FD_MPIO driver. * - * \var int32_t H5FD_ioc_config_t::thread_pool_count + * \var int32_t H5FD_ioc_config_t::thread_pool_size * The number of I/O concentrator worker threads to use. * * This value can also be set or adjusted with the #H5FD_IOC_THREAD_POOL_SIZE @@ -121,10 +121,10 @@ * */ typedef struct H5FD_ioc_config_t { - uint32_t magic; /* Must be set to H5FD_IOC_FAPL_MAGIC */ - uint32_t version; /* Must be set to H5FD_IOC_CURR_FAPL_VERSION */ - hid_t under_fapl_id; /* FAPL setup with the VFD to use for I/O to the HDF5 stub file */ - int32_t thread_pool_count; /* Number of I/O concentrator worker threads to use */ + uint32_t magic; /* Must be set to H5FD_IOC_FAPL_MAGIC */ + uint32_t version; /* Must be set to H5FD_IOC_CURR_FAPL_VERSION */ + hid_t under_fapl_id; /* FAPL setup with the VFD to use for I/O to the HDF5 stub file */ + int32_t thread_pool_size; /* Number of I/O concentrator worker threads to use */ H5FD_subfiling_shared_config_t subf_config; /* Subfiling driver configuration */ } H5FD_ioc_config_t; //! <!-- [H5FD_ioc_config_t_snip] --> diff --git a/src/H5FDsubfiling/H5FDioc_threads.c b/src/H5FDsubfiling/H5FDioc_threads.c index 4c1887f..813fb3f 100644 --- a/src/H5FDsubfiling/H5FDioc_threads.c +++ b/src/H5FDsubfiling/H5FDioc_threads.c @@ -112,9 +112,9 @@ static void ioc_io_queue_add_entry(ioc_data_t *ioc_data, sf_work_request_t *wk_r int initialize_ioc_threads(void *_sf_context) { - subfiling_context_t *sf_context = _sf_context; - ioc_data_t *ioc_data = NULL; - unsigned thread_pool_count = H5FD_IOC_DEFAULT_THREAD_POOL_SIZE; + subfiling_context_t *sf_context = _sf_context; + ioc_data_t *ioc_data = NULL; + unsigned thread_pool_size = H5FD_IOC_DEFAULT_THREAD_POOL_SIZE; char *env_value; int ret_value = 0; #ifdef H5FD_IOC_COLLECT_STATS @@ -173,12 +173,12 @@ initialize_ioc_threads(void *_sf_context) if ((env_value = HDgetenv(H5FD_IOC_THREAD_POOL_SIZE)) != NULL) { int value_check = HDatoi(env_value); if (value_check > 0) { - thread_pool_count = (unsigned int)value_check; + thread_pool_size = (unsigned int)value_check; } } /* Initialize a thread pool for the I/O concentrator's worker threads */ - if (hg_thread_pool_init(thread_pool_count, &ioc_data->io_thread_pool) < 0) + if (hg_thread_pool_init(thread_pool_size, &ioc_data->io_thread_pool) < 0) H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTINIT, (-1), "can't initialize IOC worker thread pool"); /* Create the main IOC thread that will receive and dispatch I/O requests */ @@ -194,11 +194,9 @@ initialize_ioc_threads(void *_sf_context) t_end = MPI_Wtime(); #ifdef H5FD_IOC_DEBUG - if (sf_verbose_flag) { - if (sf_context->topology->subfile_rank == 0) { - HDprintf("%s: time = %lf seconds\n", __func__, (t_end - t_start)); - HDfflush(stdout); - } + if (sf_context->topology->subfile_rank == 0) { + HDprintf("%s: time = %lf seconds\n", __func__, (t_end - t_start)); + HDfflush(stdout); } #endif @@ -242,6 +240,10 @@ finalize_ioc_threads(void *_sf_context) hg_thread_join(ioc_data->ioc_main_thread); } + if (ioc_data->io_queue.num_failed > 0) + H5_SUBFILING_DONE_ERROR(H5E_IO, H5E_CLOSEERROR, -1, "%" PRId32 " I/O requests failed", + ioc_data->io_queue.num_failed); + HDfree(ioc_data); H5_SUBFILING_FUNC_LEAVE; @@ -418,7 +420,6 @@ ioc_main(ioc_data_t *ioc_data) wk_req.subfile_rank = subfile_rank; wk_req.context_id = ioc_data->sf_context_id; wk_req.start_time = queue_start_time; - wk_req.buffer = NULL; ioc_io_queue_add_entry(ioc_data, &wk_req); @@ -521,8 +522,6 @@ handle_work_request(void *arg) atomic_fetch_add(&ioc_data->sf_work_pending, 1); - msg->in_progress = 1; - switch (msg->tag) { case WRITE_INDEP: op_ret = ioc_file_queue_write_indep(msg, msg->subfile_rank, msg->source, sf_context->sf_data_comm, @@ -744,15 +743,10 @@ ioc_file_queue_write_indep(sf_work_request_t *msg, int subfile_rank, int source, t_start = MPI_Wtime(); t_queue_delay = t_start - msg->start_time; -#ifdef H5FD_IOC_DEBUG - if (sf_verbose_flag) { - if (sf_logfile) { - HDfprintf(sf_logfile, - "[ioc(%d) %s]: msg from %d: datasize=%ld\toffset=%ld, " - "queue_delay = %lf seconds\n", - subfile_rank, __func__, source, data_size, file_offset, t_queue_delay); - } - } +#ifdef H5_SUBFILING_DEBUG + H5_subfiling_log(file_context_id, + "[ioc(%d) %s]: msg from %d: datasize=%ld\toffset=%ld, queue_delay = %lf seconds\n", + subfile_rank, __func__, source, data_size, file_offset, t_queue_delay); #endif #endif @@ -799,20 +793,16 @@ ioc_file_queue_write_indep(sf_work_request_t *msg, int subfile_rank, int source, t_start = t_end; -#ifdef H5FD_IOC_DEBUG - if (sf_verbose_flag) { - if (sf_logfile) { - HDfprintf(sf_logfile, "[ioc(%d) %s] MPI_Recv(%ld bytes, from = %d) status = %d\n", subfile_rank, - __func__, data_size, source, mpi_code); - } - } +#ifdef H5_SUBFILING_DEBUG + H5_subfiling_log(file_context_id, "[ioc(%d) %s] MPI_Recv(%ld bytes, from = %d) status = %d\n", + subfile_rank, __func__, data_size, source, mpi_code); #endif #endif sf_fid = sf_context->sf_fid; -#ifdef H5FD_IOC_DEBUG +#ifdef H5_SUBFILING_DEBUG if (sf_fid < 0) H5_subfiling_log(file_context_id, "%s: WARNING: attempt to write data to closed subfile FID %d", __func__, sf_fid); @@ -919,13 +909,10 @@ ioc_file_queue_read_indep(sf_work_request_t *msg, int subfile_rank, int source, t_start = MPI_Wtime(); t_queue_delay = t_start - msg->start_time; -#ifdef H5FD_IOC_DEBUG - if (sf_verbose_flag && (sf_logfile != NULL)) { - HDfprintf(sf_logfile, - "[ioc(%d) %s] msg from %d: datasize=%ld\toffset=%ld " - "queue_delay=%lf seconds\n", - subfile_rank, __func__, source, data_size, file_offset, t_queue_delay); - } +#ifdef H5_SUBFILING_DEBUG + H5_subfiling_log(file_context_id, + "[ioc(%d) %s] msg from %d: datasize=%ld\toffset=%ld queue_delay=%lf seconds\n", + subfile_rank, __func__, source, data_size, file_offset, t_queue_delay); #endif #endif @@ -959,10 +946,9 @@ ioc_file_queue_read_indep(sf_work_request_t *msg, int subfile_rank, int source, sf_pread_time += t_read; sf_queue_delay_time += t_queue_delay; -#ifdef H5FD_IOC_DEBUG - if (sf_verbose_flag && (sf_logfile != NULL)) { - HDfprintf(sf_logfile, "[ioc(%d)] MPI_Send to source(%d) completed\n", subfile_rank, source); - } +#ifdef H5_SUBFILING_DEBUG + H5_subfiling_log(sf_context->sf_context_id, "[ioc(%d)] MPI_Send to source(%d) completed\n", subfile_rank, + source); #endif #endif @@ -1598,7 +1584,7 @@ ioc_io_queue_complete_entry(ioc_data_t *ioc_data, ioc_io_queue_entry_t *entry_pt #ifdef H5FD_IOC_COLLECT_STATS /* Compute the queued and execution time */ queued_time = entry_ptr->dispatch_time - entry_ptr->q_time; - execution_time = H5_now_usec() = entry_ptr->dispatch_time; + execution_time = H5_now_usec() - entry_ptr->dispatch_time; ioc_data->io_queue.requests_completed++; @@ -1608,8 +1594,6 @@ ioc_io_queue_complete_entry(ioc_data_t *ioc_data, ioc_io_queue_entry_t *entry_pt hg_thread_mutex_unlock(&ioc_data->io_queue.q_mutex); - HDassert(entry_ptr->wk_req.buffer == NULL); - ioc_io_queue_free_entry(entry_ptr); entry_ptr = NULL; @@ -1642,7 +1626,6 @@ ioc_io_queue_free_entry(ioc_io_queue_entry_t *q_entry_ptr) HDassert(q_entry_ptr->magic == H5FD_IOC__IO_Q_ENTRY_MAGIC); HDassert(q_entry_ptr->next == NULL); HDassert(q_entry_ptr->prev == NULL); - HDassert(q_entry_ptr->wk_req.buffer == NULL); q_entry_ptr->magic = 0; diff --git a/src/H5FDsubfiling/H5FDsubfile_int.c b/src/H5FDsubfiling/H5FDsubfile_int.c index af14db3..22a5bd0 100644 --- a/src/H5FDsubfiling/H5FDsubfile_int.c +++ b/src/H5FDsubfiling/H5FDsubfile_int.c @@ -192,6 +192,59 @@ done: * invalid data if other ranks perform writes while this * operation is in progress. * + * SUBFILING NOTE: + * The EOF calculation for subfiling is somewhat different + * than for the more traditional HDF5 file implementations. + * This statement derives from the fact that unlike "normal" + * HDF5 files, subfiling introduces a multi-file representation + * of a single HDF5 file. The plurality of sub-files represents + * a software RAID-0 based HDF5 file. As such, each sub-file + * contains a designated portion of the address space of the + * virtual HDF5 storage. We have no notion of HDF5 datatypes, + * datasets, metadata, or other HDF5 structures; only BYTES. + * + * The organization of the bytes within sub-files is consistent + * with the RAID-0 striping, i.e. there are IO Concentrators + * (IOCs) which correspond to a stripe-count (in Lustre) as + * well as a stripe_size. The combination of these two + * variables determines the "address" (a combination of IOC + * and a file offset) of any storage operation. + * + * Having a defined storage layout, the virtual file EOF + * calculation should be the MAXIMUM value returned by the + * collection of IOCs. Every MPI rank which hosts an IOC + * maintains its own EOF by updating that value for each + * WRITE operation that completes, i.e. if a new local EOF + * is greater than the existing local EOF, the new EOF + * will replace the old. The local EOF calculation is as + * follows. + * 1. At file creation, each IOC is assigned a rank value + * (0 to N-1, where N is the total number of IOCs) and + * a 'sf_base_addr' = 'subfile_rank' * 'sf_stripe_size') + * we also determine the 'sf_blocksize_per_stripe' which + * is simply the 'sf_stripe_size' * 'n_ioc_concentrators' + * + * 2. For every write operation, the IOC receives a message + * containing a file_offset and the data_size. + * + * 3. The file_offset + data_size are in turn used to + * create a stripe_id: + * IOC-(ioc_rank) IOC-(ioc_rank+1) + * |<- sf_base_address |<- sf_base_address | + * ID +--------------------+--------------------+ + * 0:|<- sf_stripe_size ->|<- sf_stripe_size ->| + * 1:|<- sf_stripe_size ->|<- sf_stripe_size ->| + * ~ ~ ~ + * N:|<- sf_stripe_size ->|<- sf_stripe_size ->| + * +--------------------+--------------------+ + * + * The new 'stripe_id' is then used to calculate a + * potential new EOF: + * sf_eof = (stripe_id * sf_blocksize_per_stripe) + sf_base_addr + * + ((file_offset + data_size) % sf_stripe_size) + * + * 4. If (sf_eof > current_sf_eof), then current_sf_eof = sf_eof. + * * Return: SUCCEED/FAIL * * Programmer: JRM -- 1/18/22 diff --git a/src/H5FDsubfiling/H5FDsubfiling.c b/src/H5FDsubfiling/H5FDsubfiling.c index 4cdded3..8fe8f77 100644 --- a/src/H5FDsubfiling/H5FDsubfiling.c +++ b/src/H5FDsubfiling/H5FDsubfiling.c @@ -108,37 +108,6 @@ typedef struct H5FD_subfiling_t { char *file_dir; /* Directory where we find files */ char *file_path; /* The user defined filename */ -#ifndef H5_HAVE_WIN32_API - /* On most systems the combination of device and i-node number uniquely - * identify a file. Note that Cygwin, MinGW and other Windows POSIX - * environments have the stat function (which fakes inodes) - * and will use the 'device + inodes' scheme as opposed to the - * Windows code further below. - */ - dev_t device; /* file device number */ - ino_t inode; /* file i-node number */ -#else - /* Files in windows are uniquely identified by the volume serial - * number and the file index (both low and high parts). - * - * There are caveats where these numbers can change, especially - * on FAT file systems. On NTFS, however, a file should keep - * those numbers the same until renamed or deleted (though you - * can use ReplaceFile() on NTFS to keep the numbers the same - * while renaming). - * - * See the MSDN "BY_HANDLE_FILE_INFORMATION Structure" entry for - * more information. - * - * http://msdn.microsoft.com/en-us/library/aa363788(v=VS.85).aspx - */ - DWORD nFileIndexLow; - DWORD nFileIndexHigh; - DWORD dwVolumeSerialNumber; - - HANDLE hFile; /* Native windows file handle */ -#endif /* H5_HAVE_WIN32_API */ - /* * The element layouts above this point are identical with the * H5FD_ioc_t structure. As a result, @@ -175,18 +144,6 @@ typedef struct H5FD_subfiling_t { #define REGION_OVERFLOW(A, Z) \ (ADDR_OVERFLOW(A) || SIZE_OVERFLOW(Z) || HADDR_UNDEF == (A) + (Z) || (HDoff_t)((A) + (Z)) < (HDoff_t)(A)) -#define H5FD_SUBFILING_DEBUG_OP_CALLS 0 /* debugging print toggle; 0 disables */ - -#if H5FD_SUBFILING_DEBUG_OP_CALLS -#define H5FD_SUBFILING_LOG_CALL(name) \ - do { \ - HDprintf("called %s()\n", (name)); \ - HDfflush(stdout); \ - } while (0) -#else -#define H5FD_SUBFILING_LOG_CALL(name) /* no-op */ -#endif /* H5FD_SUBFILING_DEBUG_OP_CALLS */ - /* Prototypes */ static herr_t H5FD__subfiling_term(void); static void *H5FD__subfiling_fapl_get(H5FD_t *_file); @@ -393,18 +350,6 @@ H5FD__subfiling_term(void) herr_t ret_value = SUCCEED; if (H5FD_SUBFILING_g >= 0) { - /* Free the subfiling application layout information */ - if (sf_app_layout) { - HDfree(sf_app_layout->layout); - sf_app_layout->layout = NULL; - - HDfree(sf_app_layout->node_ranks); - sf_app_layout->node_ranks = NULL; - - HDfree(sf_app_layout); - sf_app_layout = NULL; - } - /* Unregister from HDF5 error API */ if (H5subfiling_err_class_g >= 0) { if (H5Eunregister_class(H5subfiling_err_class_g) < 0) @@ -646,12 +591,21 @@ H5FD__subfiling_validate_config(const H5FD_subfiling_config_t *fa) HDassert(fa != NULL); if (fa->version != H5FD_SUBFILING_CURR_FAPL_VERSION) - H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Unknown H5FD_subfiling_config_t version"); + H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "unknown H5FD_subfiling_config_t version"); if (fa->magic != H5FD_SUBFILING_FAPL_MAGIC) H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid H5FD_subfiling_config_t magic value"); - /* TODO: add extra subfiling configuration validation code */ + if (fa->ioc_fapl_id < 0) + H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid IOC FAPL ID"); + + if (!fa->require_ioc) + H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "Subfiling VFD currently always requires IOC VFD to be used"); + + if (fa->shared_cfg.ioc_selection < SELECT_IOC_ONE_PER_NODE || + fa->shared_cfg.ioc_selection >= ioc_selection_options) + H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid IOC selection method"); done: H5_SUBFILING_FUNC_LEAVE; @@ -724,8 +678,6 @@ H5FD__copy_plist(hid_t fapl_id, hid_t *id_out_ptr) int ret_value = 0; H5P_genplist_t *plist_ptr = NULL; - H5FD_SUBFILING_LOG_CALL(__func__); - HDassert(id_out_ptr != NULL); if (FALSE == H5P_isa_class(fapl_id, H5P_FILE_ACCESS)) @@ -917,24 +869,22 @@ H5FD__subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t ma } if (NULL != (file_ptr->file_path = HDrealpath(name, NULL))) { - char *path = NULL; - char *directory = dirname(path); + char *path = NULL; - if (NULL == (path = HDstrdup(file_ptr->file_path))) + if (NULL == (path = H5MM_strdup(file_ptr->file_path))) H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCOPY, NULL, "can't copy subfiling subfile path"); - if (NULL == (file_ptr->file_dir = HDstrdup(directory))) { - HDfree(path); - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCOPY, NULL, - "can't copy subfiling subfile directory path"); + if (H5_dirname(path, &file_ptr->file_dir) < 0) { + H5MM_free(path); + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "couldn't get subfile dirname"); } - HDfree(path); + H5MM_free(path); } else { if (ENOENT == errno) { if (NULL == (file_ptr->file_path = HDstrdup(name))) H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCOPY, NULL, "can't copy file name"); - if (NULL == (file_ptr->file_dir = HDstrdup("."))) + if (NULL == (file_ptr->file_dir = H5MM_strdup("."))) H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, NULL, "can't set subfile directory path"); } else @@ -1041,21 +991,6 @@ H5FD__subfiling_close_int(H5FD_subfiling_t *file_ptr) HDassert(file_ptr); -#if H5FD_SUBFILING_DEBUG_OP_CALLS - { - subfiling_context_t *sf_context = H5_get_subfiling_object(file_ptr->context_id); - - HDassert(sf_context); - HDassert(sf_context->topology); - - if (sf_context->topology->rank_is_ioc) - HDprintf("[%s %d] fd=%d\n", __func__, file_ptr->mpi_rank, sf_context->sf_fid); - else - HDprintf("[%s %d] fd=*\n", __func__, file_ptr->mpi_rank); - HDfflush(stdout); - } -#endif - if (file_ptr->sf_file && H5FD_close(file_ptr->sf_file) < 0) H5_SUBFILING_GOTO_ERROR(H5E_IO, H5E_CANTCLOSEFILE, FAIL, "unable to close subfile"); @@ -1081,7 +1016,7 @@ done: HDfree(file_ptr->file_path); file_ptr->file_path = NULL; - HDfree(file_ptr->file_dir); + H5MM_free(file_ptr->file_dir); file_ptr->file_dir = NULL; /* Release the file info */ @@ -1237,87 +1172,18 @@ H5FD__subfiling_set_eoa(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, haddr_t a * Return: End of file address, the first address past the end of the * "file", either the filesystem file or the HDF5 file. * - * SUBFILING NOTE: - * The EOF calculation for subfiling is somewhat different - * than for the more traditional HDF5 file implementations. - * This statement derives from the fact that unlike "normal" - * HDF5 files, subfiling introduces a multi-file representation - * of a single HDF5 file. The plurality of sub-files represents - * a software RAID-0 based HDF5 file. As such, each sub-file - * contains a designated portion of the address space of the - * virtual HDF5 storage. We have no notion of HDF5 datatypes, - * datasets, metadata, or other HDF5 structures; only BYTES. - * - * The organization of the bytes within sub-files is consistent - * with the RAID-0 striping, i.e. there are IO Concentrators - * (IOCs) which correspond to a stripe-count (in Lustre) as - * well as a stripe_size. The combination of these two - * variables determines the "address" (a combination of IOC - * and a file offset) of any storage operation. - * - * Having a defined storage layout, the virtual file EOF - * calculation should be the MAXIMUM value returned by the - * collection of IOCs. Every MPI rank which hosts an IOC - * maintains its own EOF by updating that value for each - * WRITE operation that completes, i.e. if a new local EOF - * is greater than the existing local EOF, the new EOF - * will replace the old. The local EOF calculation is as - * follows. - * 1. At file creation, each IOC is assigned a rank value - * (0 to N-1, where N is the total number of IOCs) and - * a 'sf_base_addr' = 'subfile_rank' * 'sf_stripe_size') - * we also determine the 'sf_blocksize_per_stripe' which - * is simply the 'sf_stripe_size' * 'n_ioc_concentrators' - * - * 2. For every write operation, the IOC receives a message - * containing a file_offset and the data_size. - * - * 3. The file_offset + data_size are in turn used to - * create a stripe_id: - * IOC-(ioc_rank) IOC-(ioc_rank+1) - * |<- sf_base_address |<- sf_base_address | - * ID +--------------------+--------------------+ - * 0:|<- sf_stripe_size ->|<- sf_stripe_size ->| - * 1:|<- sf_stripe_size ->|<- sf_stripe_size ->| - * ~ ~ ~ - * N:|<- sf_stripe_size ->|<- sf_stripe_size ->| - * +--------------------+--------------------+ - * - * The new 'stripe_id' is then used to calculate a - * potential new EOF: - * sf_eof = (stripe_id * sf_blocksize_per_stripe) + sf_base_addr - * + ((file_offset + data_size) % sf_stripe_size) - * - * 4. If (sf_eof > current_sf_eof), then current_sf_eof = sf_eof. - * - * - * Programmer: Richard Warren + * NOTE: This VFD mimics the MPI I/O VFD and so does not try + * to keep the EOF updated. The EOF is mostly just needed + * right after the file is opened so the library can determine + * if the file is empty, truncated or okay. * *------------------------------------------------------------------------- */ static haddr_t H5FD__subfiling_get_eof(const H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type) { - const H5FD_subfiling_t *file = (const H5FD_subfiling_t *)_file; -#if 0 - int64_t logical_eof = -1; -#endif - haddr_t ret_value = HADDR_UNDEF; - -#if 0 - /* - * TODO: this is a heavy weight implementation. We need something like this - * for file open, and probably for file close. However, in between, something - * similar to the current solution in the MPIIO VFD might be more appropriate. - */ - if (H5FD__subfiling__get_real_eof(file->fa.context_id, &logical_eof) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_INTERNAL, H5E_CANTGET, HADDR_UNDEF, "can't get EOF") - - /* Return the global max of all the subfile EOF values */ - ret_value = (haddr_t)(logical_eof); - -done: -#endif + const H5FD_subfiling_t *file = (const H5FD_subfiling_t *)_file; + haddr_t ret_value = HADDR_UNDEF; ret_value = file->eof; @@ -1390,8 +1256,7 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "addr overflow, addr = %" PRIuHADDR ", size = %" PRIuHADDR, addr, size); - /* TODO: Temporarily reject collective I/O until support is implemented (unless types are simple MPI_BYTE) - */ + /* Temporarily reject collective I/O until support is implemented (unless types are simple MPI_BYTE) */ { H5FD_mpio_xfer_t xfer_mode; @@ -1419,11 +1284,6 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr H5CX_set_io_xfer_mode(H5FD_MPIO_INDEPENDENT); } -#if H5FD_SUBFILING_DEBUG_OP_CALLS - HDprintf("[%s %d] addr=%ld, size=%ld\n", __func__, file_ptr->mpi_rank, addr, size); - HDfflush(stdout); -#endif - /* * Retrieve the subfiling context object and the number * of I/O concentrators. @@ -1442,14 +1302,6 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr ioc_total = sf_context->topology->n_io_concentrators; -#if H5FD_SUBFILING_DEBUG_OP_CALLS - if (sf_context->topology->rank_is_ioc) - HDprintf("[%s %d] fd=%d\n", __func__, file_ptr->mpi_rank, sf_context->sf_fid); - else - HDprintf("[%s %d] fd=*\n", __func__, file_ptr->mpi_rank); - HDfflush(stdout); -#endif - if (ioc_total == 0) { H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid number of I/O concentrators (%d)", ioc_total); @@ -1539,18 +1391,6 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate subfile I/O buffers vector"); - /* TODO: The following is left for future work */ - /* - * Set ASYNC MODE - * H5FD_class_aio_t *async_file_ptr = (H5FD_class_aio_t *)file_ptr->sf_file; - * uint64_t op_code_begin = OPC_BEGIN; - * uint64_t op_code_complete = OPC_COMPLETE; - * const void *input = NULL; - * void *output = NULL; - * H5FDctl(file_ptr->sf_file, op_code_begin, flags, input, &output); - * (*async_file_ptr->h5fdctl)(file_ptr->sf_file, op_code_begin, flags, input, &output); - */ - for (int64_t i = 0; i < max_io_req_per_ioc; i++) { uint32_t final_vec_len = vector_len; int next_ioc = ioc_start; @@ -1588,9 +1428,6 @@ H5FD__subfiling_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr if (MPI_SUCCESS != MPI_Bcast(buf, (int)size, MPI_BYTE, 0, file_ptr->comm)) H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "can't broadcast data from rank 0"); } - - /* TODO: The following is left for future work */ - /* H5FDctl(file_ptr->sf_file, op_code_complete, flags, input, &output); */ } } @@ -1658,8 +1495,7 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "addr overflow, addr = %" PRIuHADDR ", size = %" PRIuHADDR, addr, size); - /* TODO: Temporarily reject collective I/O until support is implemented (unless types are simple MPI_BYTE) - */ + /* Temporarily reject collective I/O until support is implemented (unless types are simple MPI_BYTE) */ { H5FD_mpio_xfer_t xfer_mode; @@ -1684,11 +1520,6 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add H5CX_set_io_xfer_mode(H5FD_MPIO_INDEPENDENT); } -#if H5FD_SUBFILING_DEBUG_OP_CALLS - HDprintf("[%s %d] addr=%ld, size=%ld\n", __func__, file_ptr->mpi_rank, addr, size); - HDfflush(stdout); -#endif - /* * Retrieve the subfiling context object and the number * of I/O concentrators. @@ -1707,14 +1538,6 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add ioc_total = sf_context->topology->n_io_concentrators; -#if H5FD_SUBFILING_DEBUG_OP_CALLS - if (sf_context->topology->rank_is_ioc) - HDprintf("[%s %d] fd=%d\n", __func__, file_ptr->mpi_rank, sf_context->sf_fid); - else - HDprintf("[%s %d] fd=*\n", __func__, file_ptr->mpi_rank); - HDfflush(stdout); -#endif - if (ioc_total == 0) { H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid number of I/O concentrators (%d)", ioc_total); @@ -1804,18 +1627,6 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't allocate subfile I/O buffers vector"); - /* TODO: The following is left for future work */ - /* - * Set ASYNC MODE - * H5FD_class_aio_t *async_file_ptr = (H5FD_class_aio_t *)file_ptr->sf_file; - * uint64_t op_code_begin = OPC_BEGIN; - * uint64_t op_code_complete = OPC_COMPLETE; - * const void *input = NULL; - * void *output = NULL; - * H5FDctl(file_ptr->sf_file, op_code_begin, flags, input, &output); - * (*async_file_ptr->h5fdctl)(file_ptr->sf_file, op_code_begin, flags, input, &output); - */ - for (int64_t i = 0; i < max_io_req_per_ioc; i++) { uint32_t final_vec_len = vector_len; int next_ioc = ioc_start; @@ -1845,9 +1656,6 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add io_bufs) < 0) H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_WRITEERROR, FAIL, "write to subfile failed"); } - - /* TODO: The following is left for future work */ - /* H5FDctl(file_ptr->sf_file, op_code_complete, flags, input, &output); */ } } @@ -1858,15 +1666,11 @@ H5FD__subfiling_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t add file_ptr->pos = addr; file_ptr->op = OP_WRITE; -#if 1 /* Mimic the MPI I/O VFD */ + /* Mimic the MPI I/O VFD */ file_ptr->eof = HADDR_UNDEF; if (file_ptr->pos > file_ptr->local_eof) file_ptr->local_eof = file_ptr->pos; -#else - if (file_ptr->pos > file_ptr->eof) - file_ptr->eof = file_ptr->pos; -#endif done: HDfree(io_bufs); @@ -2235,7 +2039,6 @@ H5FD__subfiling_truncate(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, hbool_t H5 HDassert(file); /* Extend the file to make sure it's large enough */ -#if 1 /* Mimic the MPI I/O VFD */ if (!H5F_addr_eq(file->eoa, file->last_eoa)) { int64_t sf_eof; int64_t eoa; @@ -2274,29 +2077,6 @@ H5FD__subfiling_truncate(H5FD_t *_file, hid_t H5_ATTR_UNUSED dxpl_id, hbool_t H5 /* Update the 'last' eoa value */ file->last_eoa = file->eoa; } -#else - if (!H5F_addr_eq(file->eoa, file->eof)) { - - /* Update the eof value */ - file->eof = file->eoa; - - /* Reset last file I/O information */ - file->pos = HADDR_UNDEF; - file->op = OP_UNKNOWN; - - /* Update the 'last' eoa value */ - file->last_eoa = file->eoa; - } /* end if */ - - /* truncate sub-files */ - /* This is a hack. We should be doing the truncate of the sub-files via calls to - * H5FD_truncate() with the IOC. However, that system is messed up at present. - * thus the following hack. - * JRM -- 12/18/21 - */ - if (H5FD__subfiling__truncate_sub_files(file->context_id, file->eof, file->comm) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTUPDATE, FAIL, "sub-file truncate request failed"); -#endif done: H5_SUBFILING_FUNC_LEAVE_API; @@ -2325,7 +2105,6 @@ H5FD__subfiling_lock(H5FD_t *_file, hbool_t rw) HDassert(file); - /* TODO: Consider lock only on IOC ranks for one IOC per subfile case */ if (file->fa.require_ioc) { #ifdef VERBOSE HDputs("Subfiling driver doesn't support file locking"); diff --git a/src/H5FDsubfiling/H5subfiling_common.c b/src/H5FDsubfiling/H5subfiling_common.c index b75dd81..d83d8c5 100644 --- a/src/H5FDsubfiling/H5subfiling_common.c +++ b/src/H5FDsubfiling/H5subfiling_common.c @@ -19,30 +19,13 @@ #include "H5subfiling_common.h" #include "H5subfiling_err.h" +#include "H5MMprivate.h" + typedef struct { /* Format of a context map entry */ void *file_handle; /* key value (linear search of the cache) */ int64_t sf_context_id; /* The return value if matching file_handle */ } file_map_to_context_t; -typedef struct stat_record { - int64_t op_count; /* How many ops in total */ - double min; /* minimum (time) */ - double max; /* maximum (time) */ - double total; /* average (time) */ -} stat_record_t; - -/* Stat (OP) Categories */ -typedef enum stat_category { - WRITE_STAT = 0, - WRITE_WAIT, - READ_STAT, - READ_WAIT, - FOPEN_STAT, - FCLOSE_STAT, - QUEUE_STAT, - TOTAL_STAT_COUNT -} stat_category_t; - /* Identifiers for HDF5's error API */ hid_t H5subfiling_err_stack_g = H5I_INVALID_HID; hid_t H5subfiling_err_class_g = H5I_INVALID_HID; @@ -55,31 +38,10 @@ static sf_topology_t *sf_topology_cache = NULL; static size_t sf_context_cache_limit = 16; static size_t sf_topology_cache_limit = 4; -app_layout_t *sf_app_layout = NULL; - static file_map_to_context_t *sf_open_file_map = NULL; static int sf_file_map_size = 0; #define DEFAULT_FILE_MAP_ENTRIES 8 -/* Definitions for recording subfiling statistics */ -static stat_record_t subfiling_stats[TOTAL_STAT_COUNT]; -#define SF_WRITE_OPS (subfiling_stats[WRITE_STAT].op_count) -#define SF_WRITE_TIME (subfiling_stats[WRITE_STAT].total / (double)subfiling_stats[WRITE_STAT].op_count) -#define SF_WRITE_WAIT_TIME (subfiling_stats[WRITE_WAIT].total / (double)subfiling_stats[WRITE_WAIT].op_count) -#define SF_READ_OPS (subfiling_stats[READ_STAT].op_count) -#define SF_READ_TIME (subfiling_stats[READ_STAT].total / (double)subfiling_stats[READ_STAT].op_count) -#define SF_READ_WAIT_TIME (subfiling_stats[READ_WAIT].total / (double)subfiling_stats[READ_WAIT].op_count) -#define SF_QUEUE_DELAYS (subfiling_stats[QUEUE_STAT].total) - -int sf_verbose_flag = 0; - -#ifdef H5_SUBFILING_DEBUG -char sf_logile_name[PATH_MAX]; -FILE *sf_logfile = NULL; - -static int sf_open_file_count = 0; -#endif - static herr_t H5_free_subfiling_object_int(subfiling_context_t *sf_context); static herr_t H5_free_subfiling_topology(sf_topology_t *topology); @@ -92,7 +54,7 @@ static herr_t init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topology, MPI_Comm file_comm); static herr_t open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags); static herr_t record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_index); -static herr_t ioc_open_file(sf_work_request_t *msg, int file_acc_flags); +static herr_t ioc_open_file(int64_t file_context_id, int file_acc_flags); static herr_t generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char *filename_out, size_t filename_out_len, char **filename_basename_out, char **subfile_dir_out); @@ -101,8 +63,6 @@ static herr_t create_config_file(subfiling_context_t *sf_context, const char *ba static herr_t open_config_file(subfiling_context_t *sf_context, const char *base_filename, const char *subfile_dir, const char *mode, FILE **config_file_out); -static void initialize_statistics(void); -static int numDigits(int n); static int get_next_fid_map_index(void); static void clear_fid_map_entry(void *file_handle, int64_t sf_context_id); static int compare_hostid(const void *h1, const void *h2); @@ -113,79 +73,6 @@ static herr_t gather_topology_info(sf_topology_t *info, MPI_Comm comm); static int identify_ioc_ranks(sf_topology_t *info, int node_count, int iocs_per_node); static inline void assign_ioc_ranks(sf_topology_t *app_topology, int ioc_count, int rank_multiple); -static void -initialize_statistics(void) -{ - HDmemset(subfiling_stats, 0, sizeof(subfiling_stats)); -} - -static int -numDigits(int n) -{ - if (n < 0) - n = (n == INT_MIN) ? INT_MAX : -n; - if (n < 10) - return 1; - if (n < 100) - return 2; - if (n < 1000) - return 3; - if (n < 10000) - return 4; - if (n < 100000) - return 5; - if (n < 1000000) - return 6; - if (n < 10000000) - return 7; - if (n < 100000000) - return 8; - if (n < 1000000000) - return 9; - return 10; -} - -/*------------------------------------------------------------------------- - * Function: set_verbose_flag - * - * Purpose: For debugging purposes, I allow a verbose setting to - * have printing of relevant information into an IOC specific - * file that is opened as a result of enabling the flag - * and closed when the verbose setting is disabled. - * - * Return: None - * Errors: None - * - * Programmer: Richard Warren - * - * Changes: Initial Version/None. - *------------------------------------------------------------------------- - */ -void -set_verbose_flag(int subfile_rank, int new_value) -{ -#ifdef H5_SUBFILING_DEBUG - sf_verbose_flag = (int)(new_value & 0x0FF); - if (sf_verbose_flag) { - char logname[64]; - HDsnprintf(logname, sizeof(logname), "ioc_%d.log", subfile_rank); - if (sf_open_file_count > 1) - sf_logfile = fopen(logname, "a+"); - else - sf_logfile = fopen(logname, "w+"); - } - else if (sf_logfile) { - fclose(sf_logfile); - sf_logfile = NULL; - } -#else - (void)subfile_rank; - (void)new_value; -#endif - - return; -} - static int get_next_fid_map_index(void) { @@ -300,8 +187,9 @@ compare_hostid(const void *h1, const void *h2) static herr_t get_ioc_selection_criteria_from_env(H5FD_subfiling_ioc_select_t *ioc_selection_type, char **ioc_sel_info_str) { - char *opt_value = NULL; - char *env_value = HDgetenv(H5FD_SUBFILING_IOC_SELECTION_CRITERIA); + char *opt_value = NULL; + char *env_value = HDgetenv(H5FD_SUBFILING_IOC_SELECTION_CRITERIA); + herr_t ret_value = SUCCEED; HDassert(ioc_selection_type); HDassert(ioc_sel_info_str); @@ -323,31 +211,24 @@ get_ioc_selection_criteria_from_env(H5FD_subfiling_ioc_select_t *ioc_selection_t errno = 0; check_value = HDstrtol(env_value, NULL, 0); - if (errno == ERANGE) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't parse value from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA - " environment variable\n", - __func__); -#endif - - return FAIL; - } + if (errno == ERANGE) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, + "couldn't parse value from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA + " environment variable"); - if ((check_value < 0) || (check_value >= ioc_selection_options)) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid IOC selection type value %ld from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA - " environment variable\n", - __func__, check_value); -#endif - - return FAIL; - } + if ((check_value < 0) || (check_value >= ioc_selection_options)) + H5_SUBFILING_GOTO_ERROR( + H5E_VFL, H5E_BADVALUE, FAIL, + "invalid IOC selection type value %ld from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA + " environment variable", + check_value); *ioc_selection_type = (H5FD_subfiling_ioc_select_t)check_value; *ioc_sel_info_str = opt_value; } - return SUCCEED; +done: + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -379,6 +260,7 @@ count_nodes(sf_topology_t *info, MPI_Comm comm) int hostid_index = -1; int my_rank; int mpi_code; + int ret_value = 0; HDassert(info); HDassert(info->app_layout); @@ -386,20 +268,12 @@ count_nodes(sf_topology_t *info, MPI_Comm comm) HDassert(info->app_layout->node_ranks); HDassert(MPI_COMM_NULL != comm); - if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(comm, &my_rank))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get MPI communicator rank; rc = %d\n", __func__, mpi_code); -#endif - - return -1; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(comm, &my_rank))) + H5_SUBFILING_MPI_GOTO_ERROR(-1, "MPI_Comm_rank failed", mpi_code); app_layout = info->app_layout; node_count = app_layout->node_count; - if (node_count == 0) - gather_topology_info(info, comm); - nextid = app_layout->layout[0].hostid; /* Possibly record my hostid_index */ if (app_layout->layout[0].rank == my_rank) { @@ -428,7 +302,10 @@ count_nodes(sf_topology_t *info, MPI_Comm comm) app_layout->node_count = node_count; - return node_count; + ret_value = node_count; + +done: + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -455,6 +332,7 @@ gather_topology_info(sf_topology_t *info, MPI_Comm comm) long hostid; int sf_world_size; int sf_world_rank; + herr_t ret_value = SUCCEED; HDassert(info); HDassert(info->app_layout); @@ -477,18 +355,14 @@ gather_topology_info(sf_topology_t *info, MPI_Comm comm) int mpi_code; if (MPI_SUCCESS != - (mpi_code = MPI_Allgather(&my_hostinfo, 2, MPI_LONG, app_layout->layout, 2, MPI_LONG, comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: MPI_Allgather failed with rc %d\n", __func__, mpi_code); -#endif + (mpi_code = MPI_Allgather(&my_hostinfo, 2, MPI_LONG, app_layout->layout, 2, MPI_LONG, comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code); - return FAIL; - } - - qsort(app_layout->layout, (size_t)sf_world_size, sizeof(layout_t), compare_hostid); + HDqsort(app_layout->layout, (size_t)sf_world_size, sizeof(layout_t), compare_hostid); } - return SUCCEED; +done: + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -567,8 +441,10 @@ assign_ioc_ranks(sf_topology_t *app_topology, int ioc_count, int rank_multiple) for (int k = 0, ioc_next = 0; ioc_next < ioc_count; ioc_next++) { ioc_index = rank_multiple * k++; io_concentrators[ioc_next] = (int)(app_layout->layout[ioc_index].rank); - if (io_concentrators[ioc_next] == app_layout->world_rank) - app_topology->rank_is_ioc = TRUE; + if (io_concentrators[ioc_next] == app_layout->world_rank) { + app_topology->subfile_rank = ioc_next; + app_topology->rank_is_ioc = TRUE; + } } app_topology->n_io_concentrators = ioc_count; } @@ -610,13 +486,6 @@ H5_new_subfiling_object_id(sf_obj_type_t obj_type, int64_t index_val) * open at a time, then only a single subfiling context cache * entry will be used. * - * Topologies are static, e.g. for any one I/O concentrator - * allocation strategy, the results should always be the same. - * - * TODO: The one exception to this being the 1 IOC per N MPI - * ranks strategy. The value of N can be changed on a per-file - * basis, so we need to address that at some point. - * * Return: Pointer to underlying subfiling object if subfiling object * ID is valid * @@ -636,14 +505,11 @@ H5_get_subfiling_object(int64_t object_id) { int64_t obj_type = (object_id >> 32) & 0x0FFFF; int64_t obj_index = object_id & 0x0FFFF; + void *ret_value = NULL; - if (obj_index < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid object index for subfiling object ID %" PRId64 "\n", __func__, object_id); -#endif - - return NULL; - } + if (obj_index < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, + "invalid object index for subfiling object ID %" PRId64, object_id); if (obj_type == SF_CONTEXT) { /* Contexts provide information principally about @@ -658,13 +524,9 @@ H5_get_subfiling_object(int64_t object_id) /* Create subfiling context cache if it doesn't exist */ if (!sf_context_cache) { - if (NULL == (sf_context_cache = HDcalloc(sf_context_cache_limit, sizeof(subfiling_context_t)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfiling context cache\n", __func__); -#endif - - return NULL; - } + if (NULL == (sf_context_cache = HDcalloc(sf_context_cache_limit, sizeof(subfiling_context_t)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, + "couldn't allocate space for subfiling context cache"); } /* Make more space in context cache if needed */ @@ -677,13 +539,9 @@ H5_get_subfiling_object(int64_t object_id) sf_context_cache_limit *= 2; if (NULL == (tmp_realloc = HDrealloc(sf_context_cache, - sf_context_cache_limit * sizeof(subfiling_context_t)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfiling context cache\n", __func__); -#endif - - return NULL; - } + sf_context_cache_limit * sizeof(subfiling_context_t)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, + "couldn't allocate space for subfiling context cache"); sf_context_cache = tmp_realloc; @@ -698,13 +556,9 @@ H5_get_subfiling_object(int64_t object_id) else if (obj_type == SF_TOPOLOGY) { /* Create subfiling topology cache if it doesn't exist */ if (!sf_topology_cache) { - if (NULL == (sf_topology_cache = HDcalloc(sf_topology_cache_limit, sizeof(sf_topology_t)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfiling topology cache\n", __func__); -#endif - - return NULL; - } + if (NULL == (sf_topology_cache = HDcalloc(sf_topology_cache_limit, sizeof(sf_topology_t)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, + "couldn't allocate space for subfiling topology cache"); } /* We will likely only cache a single topology @@ -712,13 +566,9 @@ H5_get_subfiling_object(int64_t object_id) * In that context, we will identify the number of * nodes along with the number of MPI ranks on a node. */ - if ((size_t)obj_index >= sf_topology_cache_limit) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid object index for subfiling topology object ID\n", __func__); -#endif - - return NULL; - } + if ((size_t)obj_index >= sf_topology_cache_limit) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, + "invalid object index for subfiling topology object ID"); /* Return direct pointer to the topology cache entry */ return (void *)&sf_topology_cache[obj_index]; @@ -728,7 +578,8 @@ H5_get_subfiling_object(int64_t object_id) HDprintf("%s: Unknown subfiling object type for ID %" PRId64 "\n", __func__, object_id); #endif - return NULL; +done: + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -746,23 +597,21 @@ H5_free_subfiling_object(int64_t object_id) { subfiling_context_t *sf_context = NULL; int64_t obj_type = (object_id >> 32) & 0x0FFFF; + herr_t ret_value = SUCCEED; - if (obj_type != SF_CONTEXT) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid subfiling object type for ID %" PRId64 "\n", __func__, object_id); -#endif - - return FAIL; - } + if (obj_type != SF_CONTEXT) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "invalid subfiling object type for ID %" PRId64, + object_id); - sf_context = H5_get_subfiling_object(object_id); - if (!sf_context) - return FAIL; + if (NULL == (sf_context = H5_get_subfiling_object(object_id))) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, + "couldn't get subfiling context for subfiling object ID"); if (H5_free_subfiling_object_int(sf_context) < 0) - return FAIL; + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object"); - return SUCCEED; +done: + H5_SUBFILING_FUNC_LEAVE; } static herr_t @@ -858,15 +707,7 @@ H5_free_subfiling_topology(sf_topology_t *topology) HDfree(topology->subfile_fd); topology->subfile_fd = NULL; - /* - * The below assumes that the subfiling application layout - * is retrieved once and used for subsequent file opens for - * the duration that the Subfiling VFD is in use - */ - HDassert(topology->app_layout == sf_app_layout); - -#if 0 - if (topology->app_layout && (topology->app_layout != sf_app_layout)) { + if (topology->app_layout) { HDfree(topology->app_layout->layout); topology->app_layout->layout = NULL; @@ -875,7 +716,6 @@ H5_free_subfiling_topology(sf_topology_t *topology) HDfree(topology->app_layout); } -#endif topology->app_layout = NULL; @@ -927,112 +767,55 @@ H5_open_subfiles(const char *base_filename, void *file_handle, int mpi_code; herr_t ret_value = SUCCEED; - if (!base_filename) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid base filename\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - if (!subfiling_config) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid subfiling configuration pointer\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - if (!context_id_out) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: context_id_out is NULL\n", __func__); -#endif + if (!base_filename) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "invalid subfiling base filename"); - ret_value = FAIL; - goto done; - } + if (!subfiling_config) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "invalid subfiling configuration"); - initialize_statistics(); - -#if 0 /* TODO */ - /* Maybe set the verbose flag for more debugging info */ - envValue = HDgetenv("H5_SF_VERBOSE_FLAG"); - if (envValue != NULL) { - int check_value = atoi(envValue); - if (check_value > 0) - sf_verbose_flag = 1; - } -#endif + if (!context_id_out) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "invalid subfiling context ID pointer"); /* Initialize new subfiling context ID based on configuration information */ - if (init_subfiling(subfiling_config, file_comm, &context_id) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't initialize subfiling context\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (init_subfiling(subfiling_config, file_comm, &context_id) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize subfiling context"); /* Retrieve the subfiling object for the newly-created context ID */ - if (NULL == (sf_context = H5_get_subfiling_object(context_id))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get subfiling object from context ID\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (sf_context = H5_get_subfiling_object(context_id))) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't get subfiling object from context ID"); /* Save some basic things in the new subfiling context */ sf_context->h5_file_handle = file_handle; - if (NULL == (sf_context->h5_filename = HDstrdup(base_filename))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't copy base HDF5 filename\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (sf_context->h5_filename = HDstrdup(base_filename))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate space for subfiling filename"); /* * If we're actually using the IOCs, we will * start the service threads on the identified * ranks as part of the subfile opening. */ - if (open_subfile_with_context(sf_context, file_acc_flags) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't open subfiles\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (open_subfile_with_context(sf_context, file_acc_flags) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "couldn't open subfiling subfiles"); #ifdef H5_SUBFILING_DEBUG { struct tm *tm = NULL; time_t cur_time; int mpi_rank; + int mpi_code; /* Open debugging logfile */ - if (MPI_SUCCESS != MPI_Comm_rank(file_comm, &mpi_rank)) { - HDprintf("%s: couldn't get MPI rank\n", __func__); - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(file_comm, &mpi_rank))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_rank failed", mpi_code); HDsnprintf(sf_context->sf_logfile_name, PATH_MAX, "%s.log.%d", sf_context->h5_filename, mpi_rank); - if (NULL == (sf_context->sf_logfile = HDfopen(sf_context->sf_logfile_name, "a"))) { - HDprintf("%s: couldn't open subfiling debug logfile\n", __func__); - ret_value = FAIL; - goto done; - } + if (NULL == (sf_context->sf_logfile = HDfopen(sf_context->sf_logfile_name, "a"))) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, + "couldn't open subfiling debug logfile"); cur_time = time(NULL); tm = localtime(&cur_time); @@ -1052,38 +835,24 @@ done: * Form consensus on whether opening subfiles was * successful */ - if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&l_errors, &g_errors, 1, MPI_INT, MPI_SUM, file_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("[%s %d]: MPI_Allreduce failed with rc %d\n", __func__, - sf_context->topology->app_layout->world_rank, mpi_code); -#endif - - ret_value = FAIL; - } + if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&l_errors, &g_errors, 1, MPI_INT, MPI_SUM, file_comm))) + H5_SUBFILING_MPI_DONE_ERROR(FAIL, "MPI_Allreduce failed", mpi_code); if (g_errors > 0) { -#ifdef H5_SUBFILING_DEBUG - if (sf_context->topology->app_layout->world_rank == 0) { - HDprintf("%s: one or more IOC ranks couldn't open subfiles\n", __func__); - } -#endif - - ret_value = FAIL; + H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, + "one or more IOC ranks couldn't open subfiles"); } if (ret_value < 0) { clear_fid_map_entry(file_handle, context_id); - if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't free subfiling object\n", __func__); -#endif - } + if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0) + H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object"); *context_id_out = -1; } - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /* @@ -1120,48 +889,25 @@ init_subfiling(H5FD_subfiling_shared_config_t *subfiling_config, MPI_Comm comm, HDassert(file_index >= 0); /* Use the file's index to create a new subfiling context ID */ - if ((context_id = H5_new_subfiling_object_id(SF_CONTEXT, file_index)) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create new subfiling context ID\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if ((context_id = H5_new_subfiling_object_id(SF_CONTEXT, file_index)) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't create new subfiling context ID"); /* Create a new subfiling context object with the created context ID */ - if (NULL == (new_context = H5_get_subfiling_object(context_id))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create new subfiling object\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (new_context = H5_get_subfiling_object(context_id))) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't create new subfiling object"); /* * Setup the application topology information, including the computed * number and distribution map of the set of I/O concentrators */ - if (init_app_topology(subfiling_config->ioc_selection, comm, &app_topology) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't initialize application topology\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (init_app_topology(subfiling_config->ioc_selection, comm, &app_topology) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize application topology"); new_context->sf_context_id = context_id; - if (init_subfiling_context(new_context, subfiling_config, app_topology, comm) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't initialize subfiling topology object\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (init_subfiling_context(new_context, subfiling_config, app_topology, comm) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, + "couldn't initialize subfiling application topology object"); new_context->sf_base_addr = 0; if (new_context->topology->rank_is_ioc) { @@ -1175,14 +921,11 @@ done: if (ret_value < 0) { HDfree(app_topology); - if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't free subfiling object\n", __func__); -#endif - } + if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0) + H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object"); } - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -1224,14 +967,13 @@ static herr_t init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, sf_topology_t **app_topology_out) { - sf_topology_t *app_topology = NULL; - app_layout_t *app_layout = sf_app_layout; - char *env_value = NULL; - char *ioc_sel_str = NULL; - int *io_concentrators = NULL; - long ioc_select_val = -1; - long iocs_per_node = 1; - int ioc_count = 0; + sf_topology_t *app_topology = NULL; + app_layout_t *app_layout = NULL; + char *env_value = NULL; + char *ioc_sel_str = NULL; + long ioc_select_val = -1; + long iocs_per_node = 1; + int ioc_count = 0; int comm_rank; int comm_size; int mpi_code; @@ -1241,33 +983,16 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, HDassert(app_topology_out); HDassert(!*app_topology_out); - if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(comm, &comm_rank))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get MPI communicator rank; rc = %d\n", __func__, mpi_code); -#endif + if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(comm, &comm_rank))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_rank failed", mpi_code); - ret_value = FAIL; - goto done; - } - - if (MPI_SUCCESS != (mpi_code = MPI_Comm_size(comm, &comm_size))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get MPI communicator size; rc = %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_size(comm, &comm_size))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_size failed", mpi_code); /* Check if an IOC selection type was specified by environment variable */ - if (get_ioc_selection_criteria_from_env(&ioc_selection_type, &ioc_sel_str) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get IOC selection type from environment\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (get_ioc_selection_criteria_from_env(&ioc_selection_type, &ioc_sel_str) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, + "couldn't get IOC selection type from environment"); /* Sanity checking on different IOC selection strategies */ switch (ioc_selection_type) { @@ -1318,67 +1043,29 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, } /* Allocate new application topology information object */ - if (NULL == (app_topology = HDcalloc(1, sizeof(*app_topology)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create new subfiling topology object\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (app_topology = HDcalloc(1, sizeof(*app_topology)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't create new subfiling topology object"); app_topology->subfile_rank = -1; app_topology->selection_type = ioc_selection_type; - if (NULL == (app_topology->io_concentrators = HDcalloc((size_t)comm_size, sizeof(int)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate array of I/O concentrator ranks\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - io_concentrators = app_topology->io_concentrators; - HDassert(io_concentrators); + if (NULL == (app_topology->io_concentrators = HDcalloc((size_t)comm_size, sizeof(int)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate array of I/O concentrator ranks"); if (!app_layout) { - /* TODO: this is dangerous if a new comm size is greater than what - * was allocated. Can't reuse app layout. - */ + if (NULL == (app_layout = HDcalloc(1, sizeof(*app_layout)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate application layout structure"); - if (NULL == (app_layout = HDcalloc(1, sizeof(*app_layout)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate application layout structure\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - if (NULL == (app_layout->node_ranks = HDcalloc(1, ((size_t)comm_size + 1) * sizeof(int)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate application layout node rank array\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - if (NULL == (app_layout->layout = HDcalloc(1, ((size_t)comm_size + 1) * sizeof(layout_t)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate application layout array\n", __func__); -#endif + if (NULL == (app_layout->node_ranks = HDcalloc(1, ((size_t)comm_size + 1) * sizeof(int)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate application layout node rank array"); - ret_value = FAIL; - goto done; - } - - /* - * Once the application layout has been filled once, any additional - * file open operations won't be required to gather that information. - */ - sf_app_layout = app_layout; + if (NULL == (app_layout->layout = HDcalloc(1, ((size_t)comm_size + 1) * sizeof(layout_t)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate application layout array"); } app_layout->world_size = comm_size; @@ -1386,6 +1073,8 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, app_topology->app_layout = app_layout; + gather_topology_info(app_topology, comm); + /* * Determine which ranks are I/O concentrator ranks, based on the * given IOC selection strategy and MPI information. @@ -1396,17 +1085,11 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, app_topology->selection_type = SELECT_IOC_ONE_PER_NODE; - if ((node_count = count_nodes(app_topology, comm)) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't determine number of nodes used\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if ((node_count = count_nodes(app_topology, comm)) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, + "couldn't determine number of nodes used"); /* Check for an IOC-per-node value set in the environment */ - /* TODO: should this env. var. be interpreted for other selection types? */ if ((env_value = HDgetenv(H5FD_SUBFILING_IOC_PER_NODE))) { errno = 0; ioc_select_val = HDstrtol(env_value, NULL, 0); @@ -1465,11 +1148,7 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, case SELECT_IOC_WITH_CONFIG: default: -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid IOC selection strategy\n", __func__); -#endif - ret_value = FAIL; - goto done; + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "invalid IOC selection strategy"); break; } @@ -1480,14 +1159,9 @@ init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm, * Create a vector of "potential" file descriptors * which can be indexed by the IOC ID */ - if (NULL == (app_topology->subfile_fd = HDcalloc((size_t)ioc_count, sizeof(int)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate subfile file descriptor array\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (app_topology->subfile_fd = HDcalloc((size_t)ioc_count, sizeof(int)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate subfile file descriptor array"); *app_topology_out = app_topology; @@ -1505,7 +1179,7 @@ done: } } - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -1576,15 +1250,9 @@ init_subfiling_context(subfiling_context_t *sf_context, H5FD_subfiling_shared_co errno = 0; stripe_size = HDstrtoll(env_value, NULL, 0); - if (ERANGE == errno) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid stripe size setting '%s' for " H5FD_SUBFILING_STRIPE_SIZE "\n", __func__, - env_value); -#endif - - ret_value = FAIL; - goto done; - } + if (ERANGE == errno) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, + "invalid stripe size setting for " H5FD_SUBFILING_STRIPE_SIZE); if (stripe_size > 0) { sf_context->sf_stripe_size = (int64_t)stripe_size; @@ -1599,14 +1267,8 @@ init_subfiling_context(subfiling_context_t *sf_context, H5FD_subfiling_shared_co /* Check for a subfile name prefix setting in the environment */ if ((env_value = HDgetenv(H5FD_SUBFILING_SUBFILE_PREFIX))) { - if (NULL == (sf_context->subfile_prefix = HDstrdup(env_value))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't copy subfile prefix value\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (sf_context->subfile_prefix = HDstrdup(env_value))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't copy subfile prefix value"); } /* @@ -1614,124 +1276,44 @@ init_subfiling_context(subfiling_context_t *sf_context, H5FD_subfiling_shared_co * to/from IOC ranks */ - if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(file_comm, &comm_rank))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get MPI communicator rank; rc = %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } - - if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_msg_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create sub-communicator for IOC messages; rc = %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } - - if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_msg_comm, MPI_ERRORS_RETURN))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't set MPI error handler on IOC message sub-communicator; rc = %d\n", __func__, - mpi_code); -#endif - - ret_value = FAIL; - goto done; - } - - if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_data_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create sub-communicator for IOC data; rc = %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } - - if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_data_comm, MPI_ERRORS_RETURN))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't set MPI error handler on IOC data sub-communicator; rc = %d\n", __func__, - mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(file_comm, &comm_rank))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_rank failed", mpi_code); - if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_eof_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create sub-communicator for IOC EOF; rc = %d\n", __func__, mpi_code); -#endif + if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_msg_comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code); - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_msg_comm, MPI_ERRORS_RETURN))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_set_errhandler failed", mpi_code); - if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_eof_comm, MPI_ERRORS_RETURN))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't set MPI error handler on IOC EOF sub-communicator; rc = %d\n", __func__, - mpi_code); -#endif + if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_data_comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code); - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_data_comm, MPI_ERRORS_RETURN))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_set_errhandler failed", mpi_code); - if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_barrier_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create sub-communicator for barriers; rc = %d\n", __func__, mpi_code); -#endif + if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_eof_comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code); - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_eof_comm, MPI_ERRORS_RETURN))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_set_errhandler failed", mpi_code); - if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_barrier_comm, MPI_ERRORS_RETURN))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't set MPI error handler on barrier sub-communicator; rc = %d\n", __func__, - mpi_code); -#endif + if (MPI_SUCCESS != (mpi_code = MPI_Comm_dup(file_comm, &sf_context->sf_barrier_comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_dup failed", mpi_code); - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_set_errhandler(sf_context->sf_barrier_comm, MPI_ERRORS_RETURN))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_set_errhandler failed", mpi_code); /* Create an MPI sub-communicator for IOC ranks */ if (app_topology->n_io_concentrators > 1) { if (MPI_SUCCESS != (mpi_code = MPI_Comm_split(file_comm, app_topology->rank_is_ioc, comm_rank, - &sf_context->sf_group_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't create sub-communicator for IOC ranks; rc = %d\n", __func__, mpi_code); -#endif + &sf_context->sf_group_comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_split failed", mpi_code); - ret_value = FAIL; - goto done; - } - - if (MPI_SUCCESS != - (mpi_code = MPI_Comm_rank(sf_context->sf_group_comm, &sf_context->sf_group_rank))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get MPI rank from IOC rank sub-communicator; rc = %d\n", __func__, - mpi_code); -#endif - - ret_value = FAIL; - goto done; - } - - if (MPI_SUCCESS != - (mpi_code = MPI_Comm_size(sf_context->sf_group_comm, &sf_context->sf_group_size))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get MPI comm size from IOC rank sub-communicator; rc = %d\n", __func__, - mpi_code); -#endif + if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(sf_context->sf_group_comm, &sf_context->sf_group_rank))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_rank failed", mpi_code); - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Comm_size(sf_context->sf_group_comm, &sf_context->sf_group_size))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_size failed", mpi_code); } done: @@ -1739,7 +1321,7 @@ done: H5_free_subfiling_object_int(sf_context); } - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -1779,68 +1361,35 @@ done: static herr_t open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags) { - double start_time; herr_t ret_value = SUCCEED; HDassert(sf_context); - start_time = MPI_Wtime(); - /* * Save the HDF5 file ID (fid) to subfile context mapping. * There shouldn't be any issue, but check the status and * return if there was a problem. */ - if (record_fid_to_subfile(sf_context->h5_file_handle, sf_context->sf_context_id, NULL) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't record HDF5 file ID to subfile context mapping\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (record_fid_to_subfile(sf_context->h5_file_handle, sf_context->sf_context_id, NULL) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, + "couldn't record HDF5 file ID to subfile context mapping"); /* * If this rank is an I/O concentrator, actually open * the subfile belonging to this IOC rank */ if (sf_context->topology->rank_is_ioc) { - sf_work_request_t msg = {{file_acc_flags, (int64_t)sf_context->h5_file_id, sf_context->sf_context_id}, - OPEN_OP, - sf_context->topology->app_layout->world_rank, - sf_context->topology->subfile_rank, - sf_context->sf_context_id, - start_time, - NULL, - 0, - 0, - 0, - 0}; - h5_stat_t st; + h5_stat_t st; /* Retrieve Inode value for HDF5 stub file */ - if (HDstat(sf_context->h5_filename, &st) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("[%s %d]: couldn't stat file %s\n", __func__, - sf_context->topology->app_layout->world_rank, sf_context->h5_filename); -#endif - - ret_value = FAIL; - goto done; - } + if (HDstat(sf_context->h5_filename, &st) < 0) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't stat HDF5 stub file"); HDcompile_assert(sizeof(uint64_t) >= sizeof(ino_t)); sf_context->h5_file_id = (uint64_t)st.st_ino; - if (ioc_open_file(&msg, file_acc_flags) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("[%s %d]: couldn't open subfile\n", __func__, - sf_context->topology->app_layout->world_rank); -#endif - - ret_value = FAIL; - goto done; - } + if (ioc_open_file(sf_context->sf_context_id, file_acc_flags) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, "IOC couldn't open subfile"); } done: @@ -1848,7 +1397,7 @@ done: clear_fid_map_entry(sf_context->h5_file_handle, sf_context->sf_context_id); } - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -1889,14 +1438,8 @@ record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_i if (sf_file_map_size == 0) { if (NULL == - (sf_open_file_map = HDmalloc((size_t)DEFAULT_FILE_MAP_ENTRIES * sizeof(*sf_open_file_map)))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate open file map\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + (sf_open_file_map = HDmalloc((size_t)DEFAULT_FILE_MAP_ENTRIES * sizeof(*sf_open_file_map)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't allocate open file mapping"); sf_file_map_size = DEFAULT_FILE_MAP_ENTRIES; for (int i = 0; i < sf_file_map_size; i++) { @@ -1925,14 +1468,9 @@ record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_i void *tmp_realloc; if (NULL == (tmp_realloc = HDrealloc(sf_open_file_map, - ((size_t)(sf_file_map_size * 2) * sizeof(*sf_open_file_map))))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't reallocate open file map\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + ((size_t)(sf_file_map_size * 2) * sizeof(*sf_open_file_map))))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't reallocate open file mapping"); sf_open_file_map = tmp_realloc; sf_file_map_size *= 2; @@ -1950,7 +1488,7 @@ record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_i } done: - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -1992,10 +1530,9 @@ done: *------------------------------------------------------------------------- */ static herr_t -ioc_open_file(sf_work_request_t *msg, int file_acc_flags) +ioc_open_file(int64_t file_context_id, int file_acc_flags) { - subfiling_context_t *sf_context = NULL; - int64_t file_context_id; + subfiling_context_t *sf_context = NULL; mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; char *filepath = NULL; char *subfile_dir = NULL; @@ -2003,51 +1540,24 @@ ioc_open_file(sf_work_request_t *msg, int file_acc_flags) int fd = -1; herr_t ret_value = SUCCEED; - HDassert(msg); - - /* Retrieve subfiling context ID from RPC message */ - file_context_id = msg->header[2]; - - if (NULL == (sf_context = H5_get_subfiling_object(file_context_id))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get subfiling object from context ID\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (sf_context = H5_get_subfiling_object(file_context_id))) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, + "couldn't get subfiling object from context ID"); /* Only IOC ranks should be here */ HDassert(sf_context->topology); HDassert(sf_context->topology->subfile_rank >= 0); - if (NULL == (filepath = HDcalloc(1, PATH_MAX))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfile filename\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (filepath = HDcalloc(1, PATH_MAX))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate space for subfile filename"); /* Generate the name of the subfile that this IOC rank will open */ - if (generate_subfile_name(sf_context, file_acc_flags, filepath, PATH_MAX, &base, &subfile_dir) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't generate name for subfile\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (generate_subfile_name(sf_context, file_acc_flags, filepath, PATH_MAX, &base, &subfile_dir) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, "couldn't generate name for subfile"); - if (NULL == (sf_context->sf_filename = HDstrdup(filepath))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't copy subfile name\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (sf_context->sf_filename = HDstrdup(filepath))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't copy subfile name"); /* Attempt to create/open the subfile for this IOC rank */ if ((fd = HDopen(filepath, file_acc_flags, mode)) < 0) @@ -2080,11 +1590,11 @@ done: } } - HDfree(base); - HDfree(subfile_dir); + H5MM_free(base); + H5MM_free(subfile_dir); HDfree(filepath); - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /* @@ -2134,28 +1644,16 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char */ n_io_concentrators = sf_context->topology->n_io_concentrators; - if (NULL == (prefix = HDmalloc(PATH_MAX))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfile prefix\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (prefix = HDmalloc(PATH_MAX))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate space for subfile prefix"); /* Under normal operation, we co-locate subfiles with the HDF5 file */ - HDstrncpy(prefix, sf_context->h5_filename, PATH_MAX); + HDstrncpy(prefix, sf_context->h5_filename, PATH_MAX - 1); + prefix[PATH_MAX - 1] = '\0'; - base = basename(prefix); - - if (NULL == (*filename_basename_out = HDstrdup(base))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfile basename\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (H5_basename(prefix, &base) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't get subfile basename"); if (sf_context->subfile_prefix) { /* Note: Users may specify a directory name which is inaccessible @@ -2165,28 +1663,12 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char * if so, we could default to creating the subfiles in the * current directory. (?) */ - if (NULL == (*subfile_dir_out = HDstrdup(sf_context->subfile_prefix))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't copy subfile prefix\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - subfile_dir = *subfile_dir_out; + if (NULL == (subfile_dir = H5MM_strdup(sf_context->subfile_prefix))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't copy subfile prefix"); } else { - subfile_dir = dirname(prefix); - - if (NULL == (*subfile_dir_out = HDstrdup(subfile_dir))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't copy subfile prefix\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (H5_dirname(prefix, &subfile_dir) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't get subfile prefix"); } /* @@ -2194,14 +1676,9 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char * we aren't truncating the file. */ if (0 == (file_acc_flags & O_TRUNC)) { - if (open_config_file(sf_context, base, subfile_dir, "r", &config_file) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't open existing subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (open_config_file(sf_context, base, subfile_dir, "r", &config_file) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, + "couldn't open existing subfiling configuration file"); } /* @@ -2210,14 +1687,9 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char * in order to generate the correct subfile names. */ if (config_file) { - if (H5_get_num_iocs_from_config_file(config_file, &n_io_concentrators) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't read from subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (H5_get_num_iocs_from_config_file(config_file, &n_io_concentrators) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, + "couldn't read from subfiling configuration file"); } /* @@ -2231,34 +1703,36 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char * and the configuration file will be named: * ABC.h5.subfile_<file-number>.config */ - num_digits = numDigits(n_io_concentrators); + num_digits = (int)(HDlog10(n_io_concentrators) + 1); HDsnprintf(filename_out, filename_out_len, "%s/%s" H5FD_SUBFILING_FILENAME_TEMPLATE, subfile_dir, base, sf_context->h5_file_id, num_digits, sf_context->topology->subfile_rank + 1, n_io_concentrators); -done: - if (config_file && (EOF == HDfclose(config_file))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fclose failed to close subfiling configuration file\n", __func__); -#endif + *filename_basename_out = base; + *subfile_dir_out = subfile_dir; - ret_value = FAIL; - } +done: + if (config_file && (EOF == HDfclose(config_file))) + H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, + "couldn't close subfiling configuration file"); if (ret_value < 0) { + H5MM_free(subfile_dir); + H5MM_free(base); + if (*filename_basename_out) { - HDfree(*filename_basename_out); + H5MM_free(*filename_basename_out); *filename_basename_out = NULL; } if (*subfile_dir_out) { - HDfree(*subfile_dir_out); + H5MM_free(*subfile_dir_out); *subfile_dir_out = NULL; } } HDfree(prefix); - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -2292,33 +1766,18 @@ create_config_file(subfiling_context_t *sf_context, const char *base_filename, c HDassert(base_filename); HDassert(subfile_dir); - if (sf_context->h5_file_id == UINT64_MAX) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid HDF5 file ID %" PRIu64 "\n", __func__, sf_context->h5_file_id); -#endif - - ret_value = FAIL; - goto done; - } - if (*base_filename == '\0') { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid base HDF5 filename %s\n", __func__, base_filename); -#endif - - ret_value = FAIL; - goto done; - } + if (sf_context->h5_file_id == UINT64_MAX) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "invalid HDF5 file ID %" PRIu64, + sf_context->h5_file_id); + if (*base_filename == '\0') + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "invalid base HDF5 filename '%s'", + base_filename); if (*subfile_dir == '\0') subfile_dir = "."; - if (NULL == (config_filename = HDmalloc(PATH_MAX))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfiling configuration file filename\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (config_filename = HDmalloc(PATH_MAX))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate space for subfiling configuration filename"); HDsnprintf(config_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, subfile_dir, base_filename, sf_context->h5_file_id); @@ -2329,14 +1788,9 @@ create_config_file(subfiling_context_t *sf_context, const char *base_filename, c config_file_exists = (ret == 0) || ((ret < 0) && (ENOENT != errno)); - if (config_file_exists && (ret != 0)) { -#ifdef H5_SUBFILING_DEBUG - HDperror("couldn't check existence of configuration file"); -#endif - - ret_value = FAIL; - goto done; - } + if (config_file_exists && (ret != 0)) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, + "couldn't check existence of subfiling configuration file"); /* * If a config file doesn't exist, create one. If a @@ -2349,100 +1803,61 @@ create_config_file(subfiling_context_t *sf_context, const char *base_filename, c int n_io_concentrators = sf_context->topology->n_io_concentrators; int num_digits; - if (NULL == (config_file = HDfopen(config_filename, "w+"))) { -#ifdef H5_SUBFILING_DEBUG - HDperror("couldn't open subfiling configuration file"); -#endif - - ret_value = FAIL; - goto done; - } - - if (NULL == (line_buf = HDmalloc(PATH_MAX))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate buffer for writing to subfiling configuration file\n", __func__); -#endif + if (NULL == (config_file = HDfopen(config_filename, "w+"))) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, + "couldn't open subfiling configuration file"); - ret_value = FAIL; - goto done; - } + if (NULL == (line_buf = HDmalloc(PATH_MAX))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate buffer for writing to subfiling configuration file"); /* Write the subfiling stripe size to the configuration file */ HDsnprintf(line_buf, PATH_MAX, "stripe_size=%" PRId64 "\n", sf_context->sf_stripe_size); - if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fwrite failed to write to subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, + "failed to write to subfiling configuration file"); /* Write the number of I/O concentrators to the configuration file */ HDsnprintf(line_buf, PATH_MAX, "aggregator_count=%d\n", n_io_concentrators); - if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fwrite failed to write to subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, + "failed to write to subfiling configuration file"); /* Write the base HDF5 filename to the configuration file */ HDsnprintf(line_buf, PATH_MAX, "hdf5_file=%s\n", sf_context->h5_filename); - if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fwrite failed to write to subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, + "failed to write to subfiling configuration file"); /* Write the optional subfile directory prefix to the configuration file */ HDsnprintf(line_buf, PATH_MAX, "subfile_dir=%s\n", subfile_dir); - if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fwrite failed to write to subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, + "failed to write to subfiling configuration file"); /* Write out each subfile name to the configuration file */ - num_digits = numDigits(n_io_concentrators); + num_digits = (int)(HDlog10(n_io_concentrators) + 1); for (int k = 0; k < n_io_concentrators; k++) { HDsnprintf(line_buf, PATH_MAX, "%s" H5FD_SUBFILING_FILENAME_TEMPLATE "\n", base_filename, sf_context->h5_file_id, num_digits, k + 1, n_io_concentrators); - if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fwrite failed to write to subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_WRITEERROR, FAIL, + "failed to write to subfiling configuration file"); } } done: if (config_file) { - if (EOF == HDfclose(config_file)) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fclose failed to close subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - } + if (EOF == HDfclose(config_file)) + H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, + "couldn't close subfiling configuration file"); } HDfree(line_buf); HDfree(config_filename); - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -2477,33 +1892,18 @@ open_config_file(subfiling_context_t *sf_context, const char *base_filename, con *config_file_out = NULL; - if (sf_context->h5_file_id == UINT64_MAX) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid HDF5 file ID %" PRIu64 "\n", __func__, sf_context->h5_file_id); -#endif - - ret_value = FAIL; - goto done; - } - if (*base_filename == '\0') { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: invalid base HDF5 filename %s\n", __func__, base_filename); -#endif - - ret_value = FAIL; - goto done; - } + if (sf_context->h5_file_id == UINT64_MAX) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "invalid HDF5 file ID %" PRIu64, + sf_context->h5_file_id); + if (*base_filename == '\0') + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, "invalid base HDF5 filename '%s'", + base_filename); if (*subfile_dir == '\0') subfile_dir = "."; - if (NULL == (config_filename = HDmalloc(PATH_MAX))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for subfiling configuration file filename\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (NULL == (config_filename = HDmalloc(PATH_MAX))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate space for subfiling configuration filename"); HDsnprintf(config_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, subfile_dir, base_filename, sf_context->h5_file_id); @@ -2517,40 +1917,26 @@ open_config_file(subfiling_context_t *sf_context, const char *base_filename, con if (!config_file_exists) goto done; - if (config_file_exists && (ret != 0)) { -#ifdef H5_SUBFILING_DEBUG - HDperror("couldn't check existence of configuration file"); -#endif - - ret_value = FAIL; - goto done; - } - - if (NULL == (config_file = HDfopen(config_filename, mode))) { -#ifdef H5_SUBFILING_DEBUG - HDperror("couldn't open subfiling configuration file"); -#endif + if (config_file_exists && (ret != 0)) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, + "couldn't check existence of subfiling configuration file"); - ret_value = FAIL; - goto done; - } + if (NULL == (config_file = HDfopen(config_filename, mode))) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, + "couldn't open subfiling configuration file"); *config_file_out = config_file; done: if (ret_value < 0) { - if (config_file && (EOF == HDfclose(config_file))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: fclose failed to close subfiling configuration file\n", __func__); -#endif - - ret_value = FAIL; - } + if (config_file && (EOF == HDfclose(config_file))) + H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, + "couldn't close subfiling configuration file"); } HDfree(config_filename); - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -2575,79 +1961,42 @@ H5_get_num_iocs_from_config_file(FILE *config_file, int *n_io_concentrators) HDassert(config_file); HDassert(n_io_concentrators); - if (HDfseek(config_file, 0, SEEK_END) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't seek to end of subfiling configuration file; errno = %d\n", __func__, errno); -#endif - - ret_value = FAIL; - goto done; - } - - if ((config_file_len = HDftell(config_file)) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get size of subfiling configuration file; errno = %d\n", __func__, errno); -#endif - - ret_value = FAIL; - goto done; - } - - if (HDfseek(config_file, 0, SEEK_SET) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't seek to beginning of subfiling configuration file; errno = %d\n", __func__, - errno); -#endif + if (HDfseek(config_file, 0, SEEK_END) < 0) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_SEEKERROR, FAIL, + "couldn't seek to end of subfiling configuration file"); - ret_value = FAIL; - goto done; - } + if ((config_file_len = HDftell(config_file)) < 0) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, + "couldn't get size of subfiling configuration file"); - if (NULL == (config_buf = HDmalloc((size_t)config_file_len + 1))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't allocate space for reading subfiling configuration file\n", __func__); -#endif + if (HDfseek(config_file, 0, SEEK_SET) < 0) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_SEEKERROR, FAIL, + "couldn't seek to beginning of subfiling configuration file"); - ret_value = FAIL; - goto done; - } + if (NULL == (config_buf = HDmalloc((size_t)config_file_len + 1))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, + "couldn't allocate space for reading from subfiling configuration file"); - if (HDfread(config_buf, (size_t)config_file_len, 1, config_file) != 1) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't read from subfiling configuration file; errno = %d\n", __func__, errno); -#endif - - ret_value = FAIL; - goto done; - } + if (HDfread(config_buf, (size_t)config_file_len, 1, config_file) != 1) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_READERROR, FAIL, + "couldn't read from subfiling configuration file"); config_buf[config_file_len] = '\0'; - if (NULL == (ioc_substr = HDstrstr(config_buf, "aggregator_count"))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: malformed subfiling configuration file - no aggregator_count entry\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } - - if (EOF == HDsscanf(ioc_substr, "aggregator_count=%d", &read_n_io_concs)) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get number of I/O concentrators from subfiling configuration file\n", - __func__); -#endif + if (NULL == (ioc_substr = HDstrstr(config_buf, "aggregator_count"))) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADVALUE, FAIL, + "malformed subfiling configuration file - no aggregator count entry"); - ret_value = FAIL; - goto done; - } + if (EOF == HDsscanf(ioc_substr, "aggregator_count=%d", &read_n_io_concs)) + H5_SUBFILING_SYS_GOTO_ERROR( + H5E_FILE, H5E_CANTGET, FAIL, + "couldn't get number of I/O concentrators from subfiling configuration file"); - if (read_n_io_concs <= 0) { - HDprintf("%s: invalid number of I/O concentrators (%d) read from subfiling configuration file\n", - __func__, read_n_io_concs); - ret_value = FAIL; - goto done; - } + if (read_n_io_concs <= 0) + H5_SUBFILING_GOTO_ERROR( + H5E_FILE, H5E_BADVALUE, FAIL, + "invalid number of I/O concentrators (%d) read from subfiling configuration file", + read_n_io_concs); *n_io_concentrators = read_n_io_concs; @@ -2703,77 +2052,44 @@ H5_close_subfiles(int64_t subfiling_context_id) { subfiling_context_t *sf_context = NULL; MPI_Request barrier_req = MPI_REQUEST_NULL; -#ifdef H5_SUBFILING_DEBUG - double t0 = 0.0; - double t1 = 0.0; - double t2 = 0.0; -#endif - int mpi_code; - herr_t ret_value = SUCCEED; - -#ifdef H5_SUBFILING_DEBUG - t0 = MPI_Wtime(); -#endif - - if (NULL == (sf_context = H5_get_subfiling_object(subfiling_context_id))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't get subfiling object from context ID\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + int mpi_code; + herr_t ret_value = SUCCEED; - /* We make the subfile close operation collective. - * Otherwise, there may be a race condition between - * our closing the subfiles and the user application - * moving ahead and possibly re-opening a file. - * - * If we can, we utilize an async barrier which gives - * us the opportunity to reduce the CPU load due to - * MPI spinning while waiting for the barrier to - * complete. This is especially important if there - * is heavy thread utilization due to subfiling - * activities, i.e. the thread pool might be - * extremely busy servicing I/O requests from all - * HDF5 application ranks. - */ + if (NULL == (sf_context = H5_get_subfiling_object(subfiling_context_id))) + H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "couldn't get subfiling object from context ID"); + + /* We make the subfile close operation collective. + * Otherwise, there may be a race condition between + * our closing the subfiles and the user application + * moving ahead and possibly re-opening a file. + * + * If we can, we utilize an async barrier which gives + * us the opportunity to reduce the CPU load due to + * MPI spinning while waiting for the barrier to + * complete. This is especially important if there + * is heavy thread utilization due to subfiling + * activities, i.e. the thread pool might be + * extremely busy servicing I/O requests from all + * HDF5 application ranks. + */ #if MPI_VERSION > 3 || (MPI_VERSION == 3 && MPI_SUBVERSION >= 1) { int barrier_complete = 0; - if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(sf_context->sf_barrier_comm, &barrier_req))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: MPI_Ibarrier failed with rc %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(sf_context->sf_barrier_comm, &barrier_req))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Ibarrier failed", mpi_code); while (!barrier_complete) { useconds_t t_delay = 5; usleep(t_delay); - if (MPI_SUCCESS != (mpi_code = MPI_Test(&barrier_req, &barrier_complete, MPI_STATUS_IGNORE))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: MPI_Test failed with rc %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Test(&barrier_req, &barrier_complete, MPI_STATUS_IGNORE))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Test failed", mpi_code); } } #else - if (MPI_SUCCESS != (mpi_code = MPI_Barrier(sf_context->sf_barrier_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: MPI_Barrier failed with rc %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Barrier(sf_context->sf_barrier_comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code); #endif /* The map from file handle to subfiling context can now be cleared */ @@ -2784,49 +2100,11 @@ H5_close_subfiles(int64_t subfiling_context_id) if (sf_context->topology->rank_is_ioc) { if (sf_context->sf_fid >= 0) { errno = 0; - if (HDclose(sf_context->sf_fid) < 0) { - HDperror("H5_close_subfiles - couldn't close subfile"); - -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't close subfile\n", __func__); -#endif - - ret_value = FAIL; - goto done; - } + if (HDclose(sf_context->sf_fid) < 0) + H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, "couldn't close subfile"); sf_context->sf_fid = -1; } - -#ifdef H5_SUBFILING_DEBUG - /* FIXME: If we've had multiple files open, our statistics - * will be messed up! - */ - if (sf_verbose_flag) { - t1 = t2; - if (sf_logfile != NULL) { - if (SF_WRITE_OPS > 0) - HDfprintf( - sf_logfile, - "[%d] pwrite perf: wrt_ops=%ld wait=%lf pwrite=%lf IOC_shutdown = %lf seconds\n", - sf_context->sf_group_rank, SF_WRITE_OPS, SF_WRITE_WAIT_TIME, SF_WRITE_TIME, - (t1 - t0)); - if (SF_READ_OPS > 0) - HDfprintf(sf_logfile, - "[%d] pread perf: read_ops=%ld wait=%lf pread=%lf IOC_shutdown = %lf seconds\n", - sf_context->sf_group_rank, SF_READ_OPS, SF_READ_WAIT_TIME, SF_READ_TIME, - (t1 - t0)); - - HDfprintf(sf_logfile, "[%d] Avg queue time=%lf seconds\n", sf_context->sf_group_rank, - SF_QUEUE_DELAYS / (double)(SF_WRITE_OPS + SF_READ_OPS)); - - HDfflush(sf_logfile); - - HDfclose(sf_logfile); - sf_logfile = NULL; - } - } -#endif } /* @@ -2838,50 +2116,27 @@ H5_close_subfiles(int64_t subfiling_context_id) { int barrier_complete = 0; - if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(sf_context->sf_barrier_comm, &barrier_req))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: MPI_Ibarrier failed with rc %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(sf_context->sf_barrier_comm, &barrier_req))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Ibarrier failed", mpi_code); while (!barrier_complete) { useconds_t t_delay = 5; usleep(t_delay); - if (MPI_SUCCESS != (mpi_code = MPI_Test(&barrier_req, &barrier_complete, MPI_STATUS_IGNORE))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: MPI_Test failed with rc %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Test(&barrier_req, &barrier_complete, MPI_STATUS_IGNORE))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Test failed", mpi_code); } } #else - if (MPI_SUCCESS != (mpi_code = MPI_Barrier(sf_context->sf_barrier_comm))) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: MPI_Barrier failed with rc %d\n", __func__, mpi_code); -#endif - - ret_value = FAIL; - goto done; - } + if (MPI_SUCCESS != (mpi_code = MPI_Barrier(sf_context->sf_barrier_comm))) + H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code); #endif done: - if (sf_context && H5_free_subfiling_object_int(sf_context) < 0) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: couldn't free subfiling context object\n", __func__); -#endif + if (sf_context && H5_free_subfiling_object_int(sf_context) < 0) + H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTFREE, FAIL, "couldn't free subfiling context object"); - ret_value = FAIL; - } - - return ret_value; + H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- @@ -2904,13 +2159,10 @@ done: int64_t H5_subfile_fhandle_to_context(void *file_handle) { - if (!sf_open_file_map) { -#ifdef H5_SUBFILING_DEBUG - HDprintf("%s: open file map is invalid\n", __func__); -#endif + int64_t ret_value = -1; - return -1; - } + if (!sf_open_file_map) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, -1, "open file map is NULL"); for (int i = 0; i < sf_file_map_size; i++) { if (sf_open_file_map[i].file_handle == file_handle) { @@ -2918,7 +2170,8 @@ H5_subfile_fhandle_to_context(void *file_handle) } } - return -1; +done: + H5_SUBFILING_FUNC_LEAVE; } /* end H5_subfile_fhandle_to_context() */ #ifdef H5_SUBFILING_DEBUG diff --git a/src/H5FDsubfiling/H5subfiling_common.h b/src/H5FDsubfiling/H5subfiling_common.h index 3195c9d..6e2965f 100644 --- a/src/H5FDsubfiling/H5subfiling_common.h +++ b/src/H5FDsubfiling/H5subfiling_common.h @@ -23,6 +23,7 @@ #include "H5Iprivate.h" #include "H5FDsubfiling.h" +#include "H5FDioc.h" /* * Some definitions for debugging the Subfiling feature @@ -189,25 +190,15 @@ typedef struct { */ typedef struct { /* {Datasize, Offset, FileID} */ - int64_t header[3]; /* The basic RPC input plus */ - int tag; /* the supplied OPCODE tag */ - int source; /* Rank of who sent the message */ - int subfile_rank; /* The IOC rank */ - int64_t context_id; /* context to be used to complete */ - double start_time; /* the request, + time of receipt */ - /* from which we calc Time(queued) */ - void *buffer; /* for writes, we keep the buffer */ - /* around for awhile... */ - volatile int in_progress; /* Not used! */ - volatile int serialize; /* worker thread needs to wait while true */ - volatile int dependents; //* If current work item has dependents */ - int depend_id; /* work queue index of the dependent */ + int64_t header[3]; /* The basic RPC input plus */ + int tag; /* the supplied OPCODE tag */ + int source; /* Rank of who sent the message */ + int subfile_rank; /* The IOC rank */ + int64_t context_id; /* context to be used to complete */ + double start_time; /* the request, + time of receipt */ + /* from which we calc Time(queued) */ } sf_work_request_t; -extern int sf_verbose_flag; - -extern app_layout_t *sf_app_layout; - #ifdef __cplusplus extern "C" { #endif @@ -225,8 +216,6 @@ H5_DLL herr_t H5_get_num_iocs_from_config_file(FILE *config_file, int *n_io_con H5_DLL void H5_subfiling_log(int64_t sf_context_id, const char *fmt, ...); -void set_verbose_flag(int subfile_rank, int new_value); - #ifdef __cplusplus } #endif |